diff options
Diffstat (limited to 'fs')
77 files changed, 2985 insertions, 2078 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 02a2cf61631..51545529637 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c | |||
@@ -21,8 +21,8 @@ | |||
21 | #include <linux/posix_acl_xattr.h> | 21 | #include <linux/posix_acl_xattr.h> |
22 | #include "xattr.h" | 22 | #include "xattr.h" |
23 | #include "acl.h" | 23 | #include "acl.h" |
24 | #include "v9fs_vfs.h" | ||
25 | #include "v9fs.h" | 24 | #include "v9fs.h" |
25 | #include "v9fs_vfs.h" | ||
26 | 26 | ||
27 | static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) | 27 | static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name) |
28 | { | 28 | { |
@@ -59,7 +59,8 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) | |||
59 | struct v9fs_session_info *v9ses; | 59 | struct v9fs_session_info *v9ses; |
60 | 60 | ||
61 | v9ses = v9fs_inode2v9ses(inode); | 61 | v9ses = v9fs_inode2v9ses(inode); |
62 | if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { | 62 | if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || |
63 | ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { | ||
63 | set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); | 64 | set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL); |
64 | set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); | 65 | set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); |
65 | return 0; | 66 | return 0; |
@@ -71,11 +72,15 @@ int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) | |||
71 | if (!IS_ERR(dacl) && !IS_ERR(pacl)) { | 72 | if (!IS_ERR(dacl) && !IS_ERR(pacl)) { |
72 | set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); | 73 | set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl); |
73 | set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); | 74 | set_cached_acl(inode, ACL_TYPE_ACCESS, pacl); |
74 | posix_acl_release(dacl); | ||
75 | posix_acl_release(pacl); | ||
76 | } else | 75 | } else |
77 | retval = -EIO; | 76 | retval = -EIO; |
78 | 77 | ||
78 | if (!IS_ERR(dacl)) | ||
79 | posix_acl_release(dacl); | ||
80 | |||
81 | if (!IS_ERR(pacl)) | ||
82 | posix_acl_release(pacl); | ||
83 | |||
79 | return retval; | 84 | return retval; |
80 | } | 85 | } |
81 | 86 | ||
@@ -100,9 +105,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags) | |||
100 | return -ECHILD; | 105 | return -ECHILD; |
101 | 106 | ||
102 | v9ses = v9fs_inode2v9ses(inode); | 107 | v9ses = v9fs_inode2v9ses(inode); |
103 | if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) { | 108 | if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || |
109 | ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { | ||
104 | /* | 110 | /* |
105 | * On access = client mode get the acl | 111 | * On access = client and acl = on mode get the acl |
106 | * values from the server | 112 | * values from the server |
107 | */ | 113 | */ |
108 | return 0; | 114 | return 0; |
@@ -128,6 +134,10 @@ static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl) | |||
128 | struct inode *inode = dentry->d_inode; | 134 | struct inode *inode = dentry->d_inode; |
129 | 135 | ||
130 | set_cached_acl(inode, type, acl); | 136 | set_cached_acl(inode, type, acl); |
137 | |||
138 | if (!acl) | ||
139 | return 0; | ||
140 | |||
131 | /* Set a setxattr request to server */ | 141 | /* Set a setxattr request to server */ |
132 | size = posix_acl_xattr_size(acl->a_count); | 142 | size = posix_acl_xattr_size(acl->a_count); |
133 | buffer = kmalloc(size, GFP_KERNEL); | 143 | buffer = kmalloc(size, GFP_KERNEL); |
@@ -177,10 +187,8 @@ int v9fs_acl_chmod(struct dentry *dentry) | |||
177 | int v9fs_set_create_acl(struct dentry *dentry, | 187 | int v9fs_set_create_acl(struct dentry *dentry, |
178 | struct posix_acl *dpacl, struct posix_acl *pacl) | 188 | struct posix_acl *dpacl, struct posix_acl *pacl) |
179 | { | 189 | { |
180 | if (dpacl) | 190 | v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); |
181 | v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); | 191 | v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl); |
182 | if (pacl) | ||
183 | v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl); | ||
184 | posix_acl_release(dpacl); | 192 | posix_acl_release(dpacl); |
185 | posix_acl_release(pacl); | 193 | posix_acl_release(pacl); |
186 | return 0; | 194 | return 0; |
diff --git a/fs/9p/cache.c b/fs/9p/cache.c index 0dbe0d139ac..5b335c5086a 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c | |||
@@ -33,67 +33,11 @@ | |||
33 | 33 | ||
34 | #define CACHETAG_LEN 11 | 34 | #define CACHETAG_LEN 11 |
35 | 35 | ||
36 | struct kmem_cache *vcookie_cache; | ||
37 | |||
38 | struct fscache_netfs v9fs_cache_netfs = { | 36 | struct fscache_netfs v9fs_cache_netfs = { |
39 | .name = "9p", | 37 | .name = "9p", |
40 | .version = 0, | 38 | .version = 0, |
41 | }; | 39 | }; |
42 | 40 | ||
43 | static void init_once(void *foo) | ||
44 | { | ||
45 | struct v9fs_cookie *vcookie = (struct v9fs_cookie *) foo; | ||
46 | vcookie->fscache = NULL; | ||
47 | vcookie->qid = NULL; | ||
48 | inode_init_once(&vcookie->inode); | ||
49 | } | ||
50 | |||
51 | /** | ||
52 | * v9fs_init_vcookiecache - initialize a cache for vcookies to maintain | ||
53 | * vcookie to inode mapping | ||
54 | * | ||
55 | * Returns 0 on success. | ||
56 | */ | ||
57 | |||
58 | static int v9fs_init_vcookiecache(void) | ||
59 | { | ||
60 | vcookie_cache = kmem_cache_create("vcookie_cache", | ||
61 | sizeof(struct v9fs_cookie), | ||
62 | 0, (SLAB_RECLAIM_ACCOUNT| | ||
63 | SLAB_MEM_SPREAD), | ||
64 | init_once); | ||
65 | if (!vcookie_cache) | ||
66 | return -ENOMEM; | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | /** | ||
72 | * v9fs_destroy_vcookiecache - destroy the cache of vcookies | ||
73 | * | ||
74 | */ | ||
75 | |||
76 | static void v9fs_destroy_vcookiecache(void) | ||
77 | { | ||
78 | kmem_cache_destroy(vcookie_cache); | ||
79 | } | ||
80 | |||
81 | int __v9fs_cache_register(void) | ||
82 | { | ||
83 | int ret; | ||
84 | ret = v9fs_init_vcookiecache(); | ||
85 | if (ret < 0) | ||
86 | return ret; | ||
87 | |||
88 | return fscache_register_netfs(&v9fs_cache_netfs); | ||
89 | } | ||
90 | |||
91 | void __v9fs_cache_unregister(void) | ||
92 | { | ||
93 | v9fs_destroy_vcookiecache(); | ||
94 | fscache_unregister_netfs(&v9fs_cache_netfs); | ||
95 | } | ||
96 | |||
97 | /** | 41 | /** |
98 | * v9fs_random_cachetag - Generate a random tag to be associated | 42 | * v9fs_random_cachetag - Generate a random tag to be associated |
99 | * with a new cache session. | 43 | * with a new cache session. |
@@ -133,9 +77,9 @@ static uint16_t v9fs_cache_session_get_key(const void *cookie_netfs_data, | |||
133 | } | 77 | } |
134 | 78 | ||
135 | const struct fscache_cookie_def v9fs_cache_session_index_def = { | 79 | const struct fscache_cookie_def v9fs_cache_session_index_def = { |
136 | .name = "9P.session", | 80 | .name = "9P.session", |
137 | .type = FSCACHE_COOKIE_TYPE_INDEX, | 81 | .type = FSCACHE_COOKIE_TYPE_INDEX, |
138 | .get_key = v9fs_cache_session_get_key, | 82 | .get_key = v9fs_cache_session_get_key, |
139 | }; | 83 | }; |
140 | 84 | ||
141 | void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) | 85 | void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) |
@@ -163,33 +107,33 @@ void v9fs_cache_session_put_cookie(struct v9fs_session_info *v9ses) | |||
163 | static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, | 107 | static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data, |
164 | void *buffer, uint16_t bufmax) | 108 | void *buffer, uint16_t bufmax) |
165 | { | 109 | { |
166 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | 110 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
167 | memcpy(buffer, &vcookie->qid->path, sizeof(vcookie->qid->path)); | 111 | memcpy(buffer, &v9inode->fscache_key->path, |
168 | 112 | sizeof(v9inode->fscache_key->path)); | |
169 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &vcookie->inode, | 113 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, |
170 | vcookie->qid->path); | 114 | v9inode->fscache_key->path); |
171 | return sizeof(vcookie->qid->path); | 115 | return sizeof(v9inode->fscache_key->path); |
172 | } | 116 | } |
173 | 117 | ||
174 | static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, | 118 | static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, |
175 | uint64_t *size) | 119 | uint64_t *size) |
176 | { | 120 | { |
177 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | 121 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
178 | *size = i_size_read(&vcookie->inode); | 122 | *size = i_size_read(&v9inode->vfs_inode); |
179 | 123 | ||
180 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &vcookie->inode, | 124 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get attr %llu", &v9inode->vfs_inode, |
181 | *size); | 125 | *size); |
182 | } | 126 | } |
183 | 127 | ||
184 | static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, | 128 | static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data, |
185 | void *buffer, uint16_t buflen) | 129 | void *buffer, uint16_t buflen) |
186 | { | 130 | { |
187 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | 131 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
188 | memcpy(buffer, &vcookie->qid->version, sizeof(vcookie->qid->version)); | 132 | memcpy(buffer, &v9inode->fscache_key->version, |
189 | 133 | sizeof(v9inode->fscache_key->version)); | |
190 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &vcookie->inode, | 134 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, |
191 | vcookie->qid->version); | 135 | v9inode->fscache_key->version); |
192 | return sizeof(vcookie->qid->version); | 136 | return sizeof(v9inode->fscache_key->version); |
193 | } | 137 | } |
194 | 138 | ||
195 | static enum | 139 | static enum |
@@ -197,13 +141,13 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, | |||
197 | const void *buffer, | 141 | const void *buffer, |
198 | uint16_t buflen) | 142 | uint16_t buflen) |
199 | { | 143 | { |
200 | const struct v9fs_cookie *vcookie = cookie_netfs_data; | 144 | const struct v9fs_inode *v9inode = cookie_netfs_data; |
201 | 145 | ||
202 | if (buflen != sizeof(vcookie->qid->version)) | 146 | if (buflen != sizeof(v9inode->fscache_key->version)) |
203 | return FSCACHE_CHECKAUX_OBSOLETE; | 147 | return FSCACHE_CHECKAUX_OBSOLETE; |
204 | 148 | ||
205 | if (memcmp(buffer, &vcookie->qid->version, | 149 | if (memcmp(buffer, &v9inode->fscache_key->version, |
206 | sizeof(vcookie->qid->version))) | 150 | sizeof(v9inode->fscache_key->version))) |
207 | return FSCACHE_CHECKAUX_OBSOLETE; | 151 | return FSCACHE_CHECKAUX_OBSOLETE; |
208 | 152 | ||
209 | return FSCACHE_CHECKAUX_OKAY; | 153 | return FSCACHE_CHECKAUX_OKAY; |
@@ -211,7 +155,7 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data, | |||
211 | 155 | ||
212 | static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) | 156 | static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) |
213 | { | 157 | { |
214 | struct v9fs_cookie *vcookie = cookie_netfs_data; | 158 | struct v9fs_inode *v9inode = cookie_netfs_data; |
215 | struct pagevec pvec; | 159 | struct pagevec pvec; |
216 | pgoff_t first; | 160 | pgoff_t first; |
217 | int loop, nr_pages; | 161 | int loop, nr_pages; |
@@ -220,7 +164,7 @@ static void v9fs_cache_inode_now_uncached(void *cookie_netfs_data) | |||
220 | first = 0; | 164 | first = 0; |
221 | 165 | ||
222 | for (;;) { | 166 | for (;;) { |
223 | nr_pages = pagevec_lookup(&pvec, vcookie->inode.i_mapping, | 167 | nr_pages = pagevec_lookup(&pvec, v9inode->vfs_inode.i_mapping, |
224 | first, | 168 | first, |
225 | PAGEVEC_SIZE - pagevec_count(&pvec)); | 169 | PAGEVEC_SIZE - pagevec_count(&pvec)); |
226 | if (!nr_pages) | 170 | if (!nr_pages) |
@@ -249,115 +193,114 @@ const struct fscache_cookie_def v9fs_cache_inode_index_def = { | |||
249 | 193 | ||
250 | void v9fs_cache_inode_get_cookie(struct inode *inode) | 194 | void v9fs_cache_inode_get_cookie(struct inode *inode) |
251 | { | 195 | { |
252 | struct v9fs_cookie *vcookie; | 196 | struct v9fs_inode *v9inode; |
253 | struct v9fs_session_info *v9ses; | 197 | struct v9fs_session_info *v9ses; |
254 | 198 | ||
255 | if (!S_ISREG(inode->i_mode)) | 199 | if (!S_ISREG(inode->i_mode)) |
256 | return; | 200 | return; |
257 | 201 | ||
258 | vcookie = v9fs_inode2cookie(inode); | 202 | v9inode = V9FS_I(inode); |
259 | if (vcookie->fscache) | 203 | if (v9inode->fscache) |
260 | return; | 204 | return; |
261 | 205 | ||
262 | v9ses = v9fs_inode2v9ses(inode); | 206 | v9ses = v9fs_inode2v9ses(inode); |
263 | vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, | 207 | v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, |
264 | &v9fs_cache_inode_index_def, | 208 | &v9fs_cache_inode_index_def, |
265 | vcookie); | 209 | v9inode); |
266 | 210 | ||
267 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, | 211 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p get cookie %p", inode, |
268 | vcookie->fscache); | 212 | v9inode->fscache); |
269 | } | 213 | } |
270 | 214 | ||
271 | void v9fs_cache_inode_put_cookie(struct inode *inode) | 215 | void v9fs_cache_inode_put_cookie(struct inode *inode) |
272 | { | 216 | { |
273 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 217 | struct v9fs_inode *v9inode = V9FS_I(inode); |
274 | 218 | ||
275 | if (!vcookie->fscache) | 219 | if (!v9inode->fscache) |
276 | return; | 220 | return; |
277 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, | 221 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p put cookie %p", inode, |
278 | vcookie->fscache); | 222 | v9inode->fscache); |
279 | 223 | ||
280 | fscache_relinquish_cookie(vcookie->fscache, 0); | 224 | fscache_relinquish_cookie(v9inode->fscache, 0); |
281 | vcookie->fscache = NULL; | 225 | v9inode->fscache = NULL; |
282 | } | 226 | } |
283 | 227 | ||
284 | void v9fs_cache_inode_flush_cookie(struct inode *inode) | 228 | void v9fs_cache_inode_flush_cookie(struct inode *inode) |
285 | { | 229 | { |
286 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 230 | struct v9fs_inode *v9inode = V9FS_I(inode); |
287 | 231 | ||
288 | if (!vcookie->fscache) | 232 | if (!v9inode->fscache) |
289 | return; | 233 | return; |
290 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, | 234 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p flush cookie %p", inode, |
291 | vcookie->fscache); | 235 | v9inode->fscache); |
292 | 236 | ||
293 | fscache_relinquish_cookie(vcookie->fscache, 1); | 237 | fscache_relinquish_cookie(v9inode->fscache, 1); |
294 | vcookie->fscache = NULL; | 238 | v9inode->fscache = NULL; |
295 | } | 239 | } |
296 | 240 | ||
297 | void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) | 241 | void v9fs_cache_inode_set_cookie(struct inode *inode, struct file *filp) |
298 | { | 242 | { |
299 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 243 | struct v9fs_inode *v9inode = V9FS_I(inode); |
300 | struct p9_fid *fid; | 244 | struct p9_fid *fid; |
301 | 245 | ||
302 | if (!vcookie->fscache) | 246 | if (!v9inode->fscache) |
303 | return; | 247 | return; |
304 | 248 | ||
305 | spin_lock(&vcookie->lock); | 249 | spin_lock(&v9inode->fscache_lock); |
306 | fid = filp->private_data; | 250 | fid = filp->private_data; |
307 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) | 251 | if ((filp->f_flags & O_ACCMODE) != O_RDONLY) |
308 | v9fs_cache_inode_flush_cookie(inode); | 252 | v9fs_cache_inode_flush_cookie(inode); |
309 | else | 253 | else |
310 | v9fs_cache_inode_get_cookie(inode); | 254 | v9fs_cache_inode_get_cookie(inode); |
311 | 255 | ||
312 | spin_unlock(&vcookie->lock); | 256 | spin_unlock(&v9inode->fscache_lock); |
313 | } | 257 | } |
314 | 258 | ||
315 | void v9fs_cache_inode_reset_cookie(struct inode *inode) | 259 | void v9fs_cache_inode_reset_cookie(struct inode *inode) |
316 | { | 260 | { |
317 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 261 | struct v9fs_inode *v9inode = V9FS_I(inode); |
318 | struct v9fs_session_info *v9ses; | 262 | struct v9fs_session_info *v9ses; |
319 | struct fscache_cookie *old; | 263 | struct fscache_cookie *old; |
320 | 264 | ||
321 | if (!vcookie->fscache) | 265 | if (!v9inode->fscache) |
322 | return; | 266 | return; |
323 | 267 | ||
324 | old = vcookie->fscache; | 268 | old = v9inode->fscache; |
325 | 269 | ||
326 | spin_lock(&vcookie->lock); | 270 | spin_lock(&v9inode->fscache_lock); |
327 | fscache_relinquish_cookie(vcookie->fscache, 1); | 271 | fscache_relinquish_cookie(v9inode->fscache, 1); |
328 | 272 | ||
329 | v9ses = v9fs_inode2v9ses(inode); | 273 | v9ses = v9fs_inode2v9ses(inode); |
330 | vcookie->fscache = fscache_acquire_cookie(v9ses->fscache, | 274 | v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, |
331 | &v9fs_cache_inode_index_def, | 275 | &v9fs_cache_inode_index_def, |
332 | vcookie); | 276 | v9inode); |
333 | |||
334 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", | 277 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p", |
335 | inode, old, vcookie->fscache); | 278 | inode, old, v9inode->fscache); |
336 | 279 | ||
337 | spin_unlock(&vcookie->lock); | 280 | spin_unlock(&v9inode->fscache_lock); |
338 | } | 281 | } |
339 | 282 | ||
340 | int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) | 283 | int __v9fs_fscache_release_page(struct page *page, gfp_t gfp) |
341 | { | 284 | { |
342 | struct inode *inode = page->mapping->host; | 285 | struct inode *inode = page->mapping->host; |
343 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 286 | struct v9fs_inode *v9inode = V9FS_I(inode); |
344 | 287 | ||
345 | BUG_ON(!vcookie->fscache); | 288 | BUG_ON(!v9inode->fscache); |
346 | 289 | ||
347 | return fscache_maybe_release_page(vcookie->fscache, page, gfp); | 290 | return fscache_maybe_release_page(v9inode->fscache, page, gfp); |
348 | } | 291 | } |
349 | 292 | ||
350 | void __v9fs_fscache_invalidate_page(struct page *page) | 293 | void __v9fs_fscache_invalidate_page(struct page *page) |
351 | { | 294 | { |
352 | struct inode *inode = page->mapping->host; | 295 | struct inode *inode = page->mapping->host; |
353 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 296 | struct v9fs_inode *v9inode = V9FS_I(inode); |
354 | 297 | ||
355 | BUG_ON(!vcookie->fscache); | 298 | BUG_ON(!v9inode->fscache); |
356 | 299 | ||
357 | if (PageFsCache(page)) { | 300 | if (PageFsCache(page)) { |
358 | fscache_wait_on_page_write(vcookie->fscache, page); | 301 | fscache_wait_on_page_write(v9inode->fscache, page); |
359 | BUG_ON(!PageLocked(page)); | 302 | BUG_ON(!PageLocked(page)); |
360 | fscache_uncache_page(vcookie->fscache, page); | 303 | fscache_uncache_page(v9inode->fscache, page); |
361 | } | 304 | } |
362 | } | 305 | } |
363 | 306 | ||
@@ -380,13 +323,13 @@ static void v9fs_vfs_readpage_complete(struct page *page, void *data, | |||
380 | int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) | 323 | int __v9fs_readpage_from_fscache(struct inode *inode, struct page *page) |
381 | { | 324 | { |
382 | int ret; | 325 | int ret; |
383 | const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 326 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
384 | 327 | ||
385 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | 328 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); |
386 | if (!vcookie->fscache) | 329 | if (!v9inode->fscache) |
387 | return -ENOBUFS; | 330 | return -ENOBUFS; |
388 | 331 | ||
389 | ret = fscache_read_or_alloc_page(vcookie->fscache, | 332 | ret = fscache_read_or_alloc_page(v9inode->fscache, |
390 | page, | 333 | page, |
391 | v9fs_vfs_readpage_complete, | 334 | v9fs_vfs_readpage_complete, |
392 | NULL, | 335 | NULL, |
@@ -418,13 +361,13 @@ int __v9fs_readpages_from_fscache(struct inode *inode, | |||
418 | unsigned *nr_pages) | 361 | unsigned *nr_pages) |
419 | { | 362 | { |
420 | int ret; | 363 | int ret; |
421 | const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 364 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
422 | 365 | ||
423 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); | 366 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p pages %u", inode, *nr_pages); |
424 | if (!vcookie->fscache) | 367 | if (!v9inode->fscache) |
425 | return -ENOBUFS; | 368 | return -ENOBUFS; |
426 | 369 | ||
427 | ret = fscache_read_or_alloc_pages(vcookie->fscache, | 370 | ret = fscache_read_or_alloc_pages(v9inode->fscache, |
428 | mapping, pages, nr_pages, | 371 | mapping, pages, nr_pages, |
429 | v9fs_vfs_readpage_complete, | 372 | v9fs_vfs_readpage_complete, |
430 | NULL, | 373 | NULL, |
@@ -453,11 +396,22 @@ int __v9fs_readpages_from_fscache(struct inode *inode, | |||
453 | void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) | 396 | void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page) |
454 | { | 397 | { |
455 | int ret; | 398 | int ret; |
456 | const struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 399 | const struct v9fs_inode *v9inode = V9FS_I(inode); |
457 | 400 | ||
458 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | 401 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); |
459 | ret = fscache_write_page(vcookie->fscache, page, GFP_KERNEL); | 402 | ret = fscache_write_page(v9inode->fscache, page, GFP_KERNEL); |
460 | P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); | 403 | P9_DPRINTK(P9_DEBUG_FSC, "ret = %d", ret); |
461 | if (ret != 0) | 404 | if (ret != 0) |
462 | v9fs_uncache_page(inode, page); | 405 | v9fs_uncache_page(inode, page); |
463 | } | 406 | } |
407 | |||
408 | /* | ||
409 | * wait for a page to complete writing to the cache | ||
410 | */ | ||
411 | void __v9fs_fscache_wait_on_page_write(struct inode *inode, struct page *page) | ||
412 | { | ||
413 | const struct v9fs_inode *v9inode = V9FS_I(inode); | ||
414 | P9_DPRINTK(P9_DEBUG_FSC, "inode %p page %p", inode, page); | ||
415 | if (PageFsCache(page)) | ||
416 | fscache_wait_on_page_write(v9inode->fscache, page); | ||
417 | } | ||
diff --git a/fs/9p/cache.h b/fs/9p/cache.h index a94192bfaee..049507a5b01 100644 --- a/fs/9p/cache.h +++ b/fs/9p/cache.h | |||
@@ -25,20 +25,6 @@ | |||
25 | #include <linux/fscache.h> | 25 | #include <linux/fscache.h> |
26 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
27 | 27 | ||
28 | extern struct kmem_cache *vcookie_cache; | ||
29 | |||
30 | struct v9fs_cookie { | ||
31 | spinlock_t lock; | ||
32 | struct inode inode; | ||
33 | struct fscache_cookie *fscache; | ||
34 | struct p9_qid *qid; | ||
35 | }; | ||
36 | |||
37 | static inline struct v9fs_cookie *v9fs_inode2cookie(const struct inode *inode) | ||
38 | { | ||
39 | return container_of(inode, struct v9fs_cookie, inode); | ||
40 | } | ||
41 | |||
42 | extern struct fscache_netfs v9fs_cache_netfs; | 28 | extern struct fscache_netfs v9fs_cache_netfs; |
43 | extern const struct fscache_cookie_def v9fs_cache_session_index_def; | 29 | extern const struct fscache_cookie_def v9fs_cache_session_index_def; |
44 | extern const struct fscache_cookie_def v9fs_cache_inode_index_def; | 30 | extern const struct fscache_cookie_def v9fs_cache_inode_index_def; |
@@ -64,23 +50,8 @@ extern int __v9fs_readpages_from_fscache(struct inode *inode, | |||
64 | struct list_head *pages, | 50 | struct list_head *pages, |
65 | unsigned *nr_pages); | 51 | unsigned *nr_pages); |
66 | extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); | 52 | extern void __v9fs_readpage_to_fscache(struct inode *inode, struct page *page); |
67 | 53 | extern void __v9fs_fscache_wait_on_page_write(struct inode *inode, | |
68 | 54 | struct page *page); | |
69 | /** | ||
70 | * v9fs_cache_register - Register v9fs file system with the cache | ||
71 | */ | ||
72 | static inline int v9fs_cache_register(void) | ||
73 | { | ||
74 | return __v9fs_cache_register(); | ||
75 | } | ||
76 | |||
77 | /** | ||
78 | * v9fs_cache_unregister - Unregister v9fs from the cache | ||
79 | */ | ||
80 | static inline void v9fs_cache_unregister(void) | ||
81 | { | ||
82 | __v9fs_cache_unregister(); | ||
83 | } | ||
84 | 55 | ||
85 | static inline int v9fs_fscache_release_page(struct page *page, | 56 | static inline int v9fs_fscache_release_page(struct page *page, |
86 | gfp_t gfp) | 57 | gfp_t gfp) |
@@ -117,28 +88,27 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode, | |||
117 | 88 | ||
118 | static inline void v9fs_uncache_page(struct inode *inode, struct page *page) | 89 | static inline void v9fs_uncache_page(struct inode *inode, struct page *page) |
119 | { | 90 | { |
120 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 91 | struct v9fs_inode *v9inode = V9FS_I(inode); |
121 | fscache_uncache_page(vcookie->fscache, page); | 92 | fscache_uncache_page(v9inode->fscache, page); |
122 | BUG_ON(PageFsCache(page)); | 93 | BUG_ON(PageFsCache(page)); |
123 | } | 94 | } |
124 | 95 | ||
125 | static inline void v9fs_vcookie_set_qid(struct inode *inode, | 96 | static inline void v9fs_fscache_set_key(struct inode *inode, |
126 | struct p9_qid *qid) | 97 | struct p9_qid *qid) |
127 | { | 98 | { |
128 | struct v9fs_cookie *vcookie = v9fs_inode2cookie(inode); | 99 | struct v9fs_inode *v9inode = V9FS_I(inode); |
129 | spin_lock(&vcookie->lock); | 100 | spin_lock(&v9inode->fscache_lock); |
130 | vcookie->qid = qid; | 101 | v9inode->fscache_key = qid; |
131 | spin_unlock(&vcookie->lock); | 102 | spin_unlock(&v9inode->fscache_lock); |
132 | } | 103 | } |
133 | 104 | ||
134 | #else /* CONFIG_9P_FSCACHE */ | 105 | static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, |
135 | 106 | struct page *page) | |
136 | static inline int v9fs_cache_register(void) | ||
137 | { | 107 | { |
138 | return 1; | 108 | return __v9fs_fscache_wait_on_page_write(inode, page); |
139 | } | 109 | } |
140 | 110 | ||
141 | static inline void v9fs_cache_unregister(void) {} | 111 | #else /* CONFIG_9P_FSCACHE */ |
142 | 112 | ||
143 | static inline int v9fs_fscache_release_page(struct page *page, | 113 | static inline int v9fs_fscache_release_page(struct page *page, |
144 | gfp_t gfp) { | 114 | gfp_t gfp) { |
@@ -168,9 +138,11 @@ static inline void v9fs_readpage_to_fscache(struct inode *inode, | |||
168 | static inline void v9fs_uncache_page(struct inode *inode, struct page *page) | 138 | static inline void v9fs_uncache_page(struct inode *inode, struct page *page) |
169 | {} | 139 | {} |
170 | 140 | ||
171 | static inline void v9fs_vcookie_set_qid(struct inode *inode, | 141 | static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, |
172 | struct p9_qid *qid) | 142 | struct page *page) |
173 | {} | 143 | { |
144 | return; | ||
145 | } | ||
174 | 146 | ||
175 | #endif /* CONFIG_9P_FSCACHE */ | 147 | #endif /* CONFIG_9P_FSCACHE */ |
176 | #endif /* _9P_CACHE_H */ | 148 | #endif /* _9P_CACHE_H */ |
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index b00223c99d7..cd63e002d82 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c | |||
@@ -125,46 +125,17 @@ err_out: | |||
125 | return -ENOMEM; | 125 | return -ENOMEM; |
126 | } | 126 | } |
127 | 127 | ||
128 | /** | 128 | static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry, |
129 | * v9fs_fid_lookup - lookup for a fid, try to walk if not found | 129 | uid_t uid, int any) |
130 | * @dentry: dentry to look for fid in | ||
131 | * | ||
132 | * Look for a fid in the specified dentry for the current user. | ||
133 | * If no fid is found, try to create one walking from a fid from the parent | ||
134 | * dentry (if it has one), or the root dentry. If the user haven't accessed | ||
135 | * the fs yet, attach now and walk from the root. | ||
136 | */ | ||
137 | |||
138 | struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) | ||
139 | { | 130 | { |
140 | int i, n, l, clone, any, access; | ||
141 | u32 uid; | ||
142 | struct p9_fid *fid, *old_fid = NULL; | ||
143 | struct dentry *ds; | 131 | struct dentry *ds; |
144 | struct v9fs_session_info *v9ses; | ||
145 | char **wnames, *uname; | 132 | char **wnames, *uname; |
133 | int i, n, l, clone, access; | ||
134 | struct v9fs_session_info *v9ses; | ||
135 | struct p9_fid *fid, *old_fid = NULL; | ||
146 | 136 | ||
147 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | 137 | v9ses = v9fs_inode2v9ses(dentry->d_inode); |
148 | access = v9ses->flags & V9FS_ACCESS_MASK; | 138 | access = v9ses->flags & V9FS_ACCESS_MASK; |
149 | switch (access) { | ||
150 | case V9FS_ACCESS_SINGLE: | ||
151 | case V9FS_ACCESS_USER: | ||
152 | case V9FS_ACCESS_CLIENT: | ||
153 | uid = current_fsuid(); | ||
154 | any = 0; | ||
155 | break; | ||
156 | |||
157 | case V9FS_ACCESS_ANY: | ||
158 | uid = v9ses->uid; | ||
159 | any = 1; | ||
160 | break; | ||
161 | |||
162 | default: | ||
163 | uid = ~0; | ||
164 | any = 0; | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | fid = v9fs_fid_find(dentry, uid, any); | 139 | fid = v9fs_fid_find(dentry, uid, any); |
169 | if (fid) | 140 | if (fid) |
170 | return fid; | 141 | return fid; |
@@ -250,6 +221,45 @@ err_out: | |||
250 | return fid; | 221 | return fid; |
251 | } | 222 | } |
252 | 223 | ||
224 | /** | ||
225 | * v9fs_fid_lookup - lookup for a fid, try to walk if not found | ||
226 | * @dentry: dentry to look for fid in | ||
227 | * | ||
228 | * Look for a fid in the specified dentry for the current user. | ||
229 | * If no fid is found, try to create one walking from a fid from the parent | ||
230 | * dentry (if it has one), or the root dentry. If the user haven't accessed | ||
231 | * the fs yet, attach now and walk from the root. | ||
232 | */ | ||
233 | |||
234 | struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) | ||
235 | { | ||
236 | uid_t uid; | ||
237 | int any, access; | ||
238 | struct v9fs_session_info *v9ses; | ||
239 | |||
240 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | ||
241 | access = v9ses->flags & V9FS_ACCESS_MASK; | ||
242 | switch (access) { | ||
243 | case V9FS_ACCESS_SINGLE: | ||
244 | case V9FS_ACCESS_USER: | ||
245 | case V9FS_ACCESS_CLIENT: | ||
246 | uid = current_fsuid(); | ||
247 | any = 0; | ||
248 | break; | ||
249 | |||
250 | case V9FS_ACCESS_ANY: | ||
251 | uid = v9ses->uid; | ||
252 | any = 1; | ||
253 | break; | ||
254 | |||
255 | default: | ||
256 | uid = ~0; | ||
257 | any = 0; | ||
258 | break; | ||
259 | } | ||
260 | return v9fs_fid_lookup_with_uid(dentry, uid, any); | ||
261 | } | ||
262 | |||
253 | struct p9_fid *v9fs_fid_clone(struct dentry *dentry) | 263 | struct p9_fid *v9fs_fid_clone(struct dentry *dentry) |
254 | { | 264 | { |
255 | struct p9_fid *fid, *ret; | 265 | struct p9_fid *fid, *ret; |
@@ -261,3 +271,39 @@ struct p9_fid *v9fs_fid_clone(struct dentry *dentry) | |||
261 | ret = p9_client_walk(fid, 0, NULL, 1); | 271 | ret = p9_client_walk(fid, 0, NULL, 1); |
262 | return ret; | 272 | return ret; |
263 | } | 273 | } |
274 | |||
275 | static struct p9_fid *v9fs_fid_clone_with_uid(struct dentry *dentry, uid_t uid) | ||
276 | { | ||
277 | struct p9_fid *fid, *ret; | ||
278 | |||
279 | fid = v9fs_fid_lookup_with_uid(dentry, uid, 0); | ||
280 | if (IS_ERR(fid)) | ||
281 | return fid; | ||
282 | |||
283 | ret = p9_client_walk(fid, 0, NULL, 1); | ||
284 | return ret; | ||
285 | } | ||
286 | |||
287 | struct p9_fid *v9fs_writeback_fid(struct dentry *dentry) | ||
288 | { | ||
289 | int err; | ||
290 | struct p9_fid *fid; | ||
291 | |||
292 | fid = v9fs_fid_clone_with_uid(dentry, 0); | ||
293 | if (IS_ERR(fid)) | ||
294 | goto error_out; | ||
295 | /* | ||
296 | * writeback fid will only be used to write back the | ||
297 | * dirty pages. We always request for the open fid in read-write | ||
298 | * mode so that a partial page write which result in page | ||
299 | * read can work. | ||
300 | */ | ||
301 | err = p9_client_open(fid, O_RDWR); | ||
302 | if (err < 0) { | ||
303 | p9_client_clunk(fid); | ||
304 | fid = ERR_PTR(err); | ||
305 | goto error_out; | ||
306 | } | ||
307 | error_out: | ||
308 | return fid; | ||
309 | } | ||
diff --git a/fs/9p/fid.h b/fs/9p/fid.h index c3bbd6af996..bb0b6e7f58f 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h | |||
@@ -19,7 +19,8 @@ | |||
19 | * Boston, MA 02111-1301 USA | 19 | * Boston, MA 02111-1301 USA |
20 | * | 20 | * |
21 | */ | 21 | */ |
22 | 22 | #ifndef FS_9P_FID_H | |
23 | #define FS_9P_FID_H | ||
23 | #include <linux/list.h> | 24 | #include <linux/list.h> |
24 | 25 | ||
25 | /** | 26 | /** |
@@ -45,3 +46,5 @@ struct v9fs_dentry { | |||
45 | struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); | 46 | struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); |
46 | struct p9_fid *v9fs_fid_clone(struct dentry *dentry); | 47 | struct p9_fid *v9fs_fid_clone(struct dentry *dentry); |
47 | int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); | 48 | int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); |
49 | struct p9_fid *v9fs_writeback_fid(struct dentry *dentry); | ||
50 | #endif | ||
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 2f77cd33ba8..c82b017f51f 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
@@ -39,6 +39,7 @@ | |||
39 | 39 | ||
40 | static DEFINE_SPINLOCK(v9fs_sessionlist_lock); | 40 | static DEFINE_SPINLOCK(v9fs_sessionlist_lock); |
41 | static LIST_HEAD(v9fs_sessionlist); | 41 | static LIST_HEAD(v9fs_sessionlist); |
42 | struct kmem_cache *v9fs_inode_cache; | ||
42 | 43 | ||
43 | /* | 44 | /* |
44 | * Option Parsing (code inspired by NFS code) | 45 | * Option Parsing (code inspired by NFS code) |
@@ -55,7 +56,7 @@ enum { | |||
55 | /* Cache options */ | 56 | /* Cache options */ |
56 | Opt_cache_loose, Opt_fscache, | 57 | Opt_cache_loose, Opt_fscache, |
57 | /* Access options */ | 58 | /* Access options */ |
58 | Opt_access, | 59 | Opt_access, Opt_posixacl, |
59 | /* Error token */ | 60 | /* Error token */ |
60 | Opt_err | 61 | Opt_err |
61 | }; | 62 | }; |
@@ -73,6 +74,7 @@ static const match_table_t tokens = { | |||
73 | {Opt_fscache, "fscache"}, | 74 | {Opt_fscache, "fscache"}, |
74 | {Opt_cachetag, "cachetag=%s"}, | 75 | {Opt_cachetag, "cachetag=%s"}, |
75 | {Opt_access, "access=%s"}, | 76 | {Opt_access, "access=%s"}, |
77 | {Opt_posixacl, "posixacl"}, | ||
76 | {Opt_err, NULL} | 78 | {Opt_err, NULL} |
77 | }; | 79 | }; |
78 | 80 | ||
@@ -194,15 +196,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
194 | else if (strcmp(s, "any") == 0) | 196 | else if (strcmp(s, "any") == 0) |
195 | v9ses->flags |= V9FS_ACCESS_ANY; | 197 | v9ses->flags |= V9FS_ACCESS_ANY; |
196 | else if (strcmp(s, "client") == 0) { | 198 | else if (strcmp(s, "client") == 0) { |
197 | #ifdef CONFIG_9P_FS_POSIX_ACL | ||
198 | v9ses->flags |= V9FS_ACCESS_CLIENT; | 199 | v9ses->flags |= V9FS_ACCESS_CLIENT; |
199 | #else | ||
200 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
201 | "access=client option not supported\n"); | ||
202 | kfree(s); | ||
203 | ret = -EINVAL; | ||
204 | goto free_and_return; | ||
205 | #endif | ||
206 | } else { | 200 | } else { |
207 | v9ses->flags |= V9FS_ACCESS_SINGLE; | 201 | v9ses->flags |= V9FS_ACCESS_SINGLE; |
208 | v9ses->uid = simple_strtoul(s, &e, 10); | 202 | v9ses->uid = simple_strtoul(s, &e, 10); |
@@ -212,6 +206,16 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) | |||
212 | kfree(s); | 206 | kfree(s); |
213 | break; | 207 | break; |
214 | 208 | ||
209 | case Opt_posixacl: | ||
210 | #ifdef CONFIG_9P_FS_POSIX_ACL | ||
211 | v9ses->flags |= V9FS_POSIX_ACL; | ||
212 | #else | ||
213 | P9_DPRINTK(P9_DEBUG_ERROR, | ||
214 | "Not defined CONFIG_9P_FS_POSIX_ACL. " | ||
215 | "Ignoring posixacl option\n"); | ||
216 | #endif | ||
217 | break; | ||
218 | |||
215 | default: | 219 | default: |
216 | continue; | 220 | continue; |
217 | } | 221 | } |
@@ -260,19 +264,12 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
260 | list_add(&v9ses->slist, &v9fs_sessionlist); | 264 | list_add(&v9ses->slist, &v9fs_sessionlist); |
261 | spin_unlock(&v9fs_sessionlist_lock); | 265 | spin_unlock(&v9fs_sessionlist_lock); |
262 | 266 | ||
263 | v9ses->flags = V9FS_ACCESS_USER; | ||
264 | strcpy(v9ses->uname, V9FS_DEFUSER); | 267 | strcpy(v9ses->uname, V9FS_DEFUSER); |
265 | strcpy(v9ses->aname, V9FS_DEFANAME); | 268 | strcpy(v9ses->aname, V9FS_DEFANAME); |
266 | v9ses->uid = ~0; | 269 | v9ses->uid = ~0; |
267 | v9ses->dfltuid = V9FS_DEFUID; | 270 | v9ses->dfltuid = V9FS_DEFUID; |
268 | v9ses->dfltgid = V9FS_DEFGID; | 271 | v9ses->dfltgid = V9FS_DEFGID; |
269 | 272 | ||
270 | rc = v9fs_parse_options(v9ses, data); | ||
271 | if (rc < 0) { | ||
272 | retval = rc; | ||
273 | goto error; | ||
274 | } | ||
275 | |||
276 | v9ses->clnt = p9_client_create(dev_name, data); | 273 | v9ses->clnt = p9_client_create(dev_name, data); |
277 | if (IS_ERR(v9ses->clnt)) { | 274 | if (IS_ERR(v9ses->clnt)) { |
278 | retval = PTR_ERR(v9ses->clnt); | 275 | retval = PTR_ERR(v9ses->clnt); |
@@ -281,10 +278,20 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
281 | goto error; | 278 | goto error; |
282 | } | 279 | } |
283 | 280 | ||
284 | if (p9_is_proto_dotl(v9ses->clnt)) | 281 | v9ses->flags = V9FS_ACCESS_USER; |
282 | |||
283 | if (p9_is_proto_dotl(v9ses->clnt)) { | ||
284 | v9ses->flags = V9FS_ACCESS_CLIENT; | ||
285 | v9ses->flags |= V9FS_PROTO_2000L; | 285 | v9ses->flags |= V9FS_PROTO_2000L; |
286 | else if (p9_is_proto_dotu(v9ses->clnt)) | 286 | } else if (p9_is_proto_dotu(v9ses->clnt)) { |
287 | v9ses->flags |= V9FS_PROTO_2000U; | 287 | v9ses->flags |= V9FS_PROTO_2000U; |
288 | } | ||
289 | |||
290 | rc = v9fs_parse_options(v9ses, data); | ||
291 | if (rc < 0) { | ||
292 | retval = rc; | ||
293 | goto error; | ||
294 | } | ||
288 | 295 | ||
289 | v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; | 296 | v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; |
290 | 297 | ||
@@ -306,6 +313,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
306 | v9ses->flags |= V9FS_ACCESS_ANY; | 313 | v9ses->flags |= V9FS_ACCESS_ANY; |
307 | v9ses->uid = ~0; | 314 | v9ses->uid = ~0; |
308 | } | 315 | } |
316 | if (!v9fs_proto_dotl(v9ses) || | ||
317 | !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) { | ||
318 | /* | ||
319 | * We support ACL checks on clinet only if the protocol is | ||
320 | * 9P2000.L and access is V9FS_ACCESS_CLIENT. | ||
321 | */ | ||
322 | v9ses->flags &= ~V9FS_ACL_MASK; | ||
323 | } | ||
309 | 324 | ||
310 | fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, | 325 | fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, ~0, |
311 | v9ses->aname); | 326 | v9ses->aname); |
@@ -467,6 +482,63 @@ static void v9fs_sysfs_cleanup(void) | |||
467 | kobject_put(v9fs_kobj); | 482 | kobject_put(v9fs_kobj); |
468 | } | 483 | } |
469 | 484 | ||
485 | static void v9fs_inode_init_once(void *foo) | ||
486 | { | ||
487 | struct v9fs_inode *v9inode = (struct v9fs_inode *)foo; | ||
488 | #ifdef CONFIG_9P_FSCACHE | ||
489 | v9inode->fscache = NULL; | ||
490 | v9inode->fscache_key = NULL; | ||
491 | #endif | ||
492 | inode_init_once(&v9inode->vfs_inode); | ||
493 | } | ||
494 | |||
495 | /** | ||
496 | * v9fs_init_inode_cache - initialize a cache for 9P | ||
497 | * Returns 0 on success. | ||
498 | */ | ||
499 | static int v9fs_init_inode_cache(void) | ||
500 | { | ||
501 | v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache", | ||
502 | sizeof(struct v9fs_inode), | ||
503 | 0, (SLAB_RECLAIM_ACCOUNT| | ||
504 | SLAB_MEM_SPREAD), | ||
505 | v9fs_inode_init_once); | ||
506 | if (!v9fs_inode_cache) | ||
507 | return -ENOMEM; | ||
508 | |||
509 | return 0; | ||
510 | } | ||
511 | |||
512 | /** | ||
513 | * v9fs_destroy_inode_cache - destroy the cache of 9P inode | ||
514 | * | ||
515 | */ | ||
516 | static void v9fs_destroy_inode_cache(void) | ||
517 | { | ||
518 | kmem_cache_destroy(v9fs_inode_cache); | ||
519 | } | ||
520 | |||
521 | static int v9fs_cache_register(void) | ||
522 | { | ||
523 | int ret; | ||
524 | ret = v9fs_init_inode_cache(); | ||
525 | if (ret < 0) | ||
526 | return ret; | ||
527 | #ifdef CONFIG_9P_FSCACHE | ||
528 | return fscache_register_netfs(&v9fs_cache_netfs); | ||
529 | #else | ||
530 | return ret; | ||
531 | #endif | ||
532 | } | ||
533 | |||
534 | static void v9fs_cache_unregister(void) | ||
535 | { | ||
536 | v9fs_destroy_inode_cache(); | ||
537 | #ifdef CONFIG_9P_FSCACHE | ||
538 | fscache_unregister_netfs(&v9fs_cache_netfs); | ||
539 | #endif | ||
540 | } | ||
541 | |||
470 | /** | 542 | /** |
471 | * init_v9fs - Initialize module | 543 | * init_v9fs - Initialize module |
472 | * | 544 | * |
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index c4b5d8864f0..bd8496db135 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h | |||
@@ -20,6 +20,9 @@ | |||
20 | * Boston, MA 02111-1301 USA | 20 | * Boston, MA 02111-1301 USA |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | #ifndef FS_9P_V9FS_H | ||
24 | #define FS_9P_V9FS_H | ||
25 | |||
23 | #include <linux/backing-dev.h> | 26 | #include <linux/backing-dev.h> |
24 | 27 | ||
25 | /** | 28 | /** |
@@ -28,8 +31,10 @@ | |||
28 | * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions | 31 | * @V9FS_PROTO_2000L: whether or not to use 9P2000.l extensions |
29 | * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy | 32 | * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy |
30 | * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) | 33 | * @V9FS_ACCESS_USER: a new attach will be issued for every user (default) |
34 | * @V9FS_ACCESS_CLIENT: Just like user, but access check is performed on client. | ||
31 | * @V9FS_ACCESS_ANY: use a single attach for all users | 35 | * @V9FS_ACCESS_ANY: use a single attach for all users |
32 | * @V9FS_ACCESS_MASK: bit mask of different ACCESS options | 36 | * @V9FS_ACCESS_MASK: bit mask of different ACCESS options |
37 | * @V9FS_POSIX_ACL: POSIX ACLs are enforced | ||
33 | * | 38 | * |
34 | * Session flags reflect options selected by users at mount time | 39 | * Session flags reflect options selected by users at mount time |
35 | */ | 40 | */ |
@@ -37,13 +42,15 @@ | |||
37 | V9FS_ACCESS_USER | \ | 42 | V9FS_ACCESS_USER | \ |
38 | V9FS_ACCESS_CLIENT) | 43 | V9FS_ACCESS_CLIENT) |
39 | #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY | 44 | #define V9FS_ACCESS_MASK V9FS_ACCESS_ANY |
45 | #define V9FS_ACL_MASK V9FS_POSIX_ACL | ||
40 | 46 | ||
41 | enum p9_session_flags { | 47 | enum p9_session_flags { |
42 | V9FS_PROTO_2000U = 0x01, | 48 | V9FS_PROTO_2000U = 0x01, |
43 | V9FS_PROTO_2000L = 0x02, | 49 | V9FS_PROTO_2000L = 0x02, |
44 | V9FS_ACCESS_SINGLE = 0x04, | 50 | V9FS_ACCESS_SINGLE = 0x04, |
45 | V9FS_ACCESS_USER = 0x08, | 51 | V9FS_ACCESS_USER = 0x08, |
46 | V9FS_ACCESS_CLIENT = 0x10 | 52 | V9FS_ACCESS_CLIENT = 0x10, |
53 | V9FS_POSIX_ACL = 0x20 | ||
47 | }; | 54 | }; |
48 | 55 | ||
49 | /* possible values of ->cache */ | 56 | /* possible values of ->cache */ |
@@ -109,8 +116,28 @@ struct v9fs_session_info { | |||
109 | struct list_head slist; /* list of sessions registered with v9fs */ | 116 | struct list_head slist; /* list of sessions registered with v9fs */ |
110 | struct backing_dev_info bdi; | 117 | struct backing_dev_info bdi; |
111 | struct rw_semaphore rename_sem; | 118 | struct rw_semaphore rename_sem; |
119 | struct p9_fid *root_fid; /* Used for file system sync */ | ||
120 | }; | ||
121 | |||
122 | /* cache_validity flags */ | ||
123 | #define V9FS_INO_INVALID_ATTR 0x01 | ||
124 | |||
125 | struct v9fs_inode { | ||
126 | #ifdef CONFIG_9P_FSCACHE | ||
127 | spinlock_t fscache_lock; | ||
128 | struct fscache_cookie *fscache; | ||
129 | struct p9_qid *fscache_key; | ||
130 | #endif | ||
131 | unsigned int cache_validity; | ||
132 | struct p9_fid *writeback_fid; | ||
133 | struct inode vfs_inode; | ||
112 | }; | 134 | }; |
113 | 135 | ||
136 | static inline struct v9fs_inode *V9FS_I(const struct inode *inode) | ||
137 | { | ||
138 | return container_of(inode, struct v9fs_inode, vfs_inode); | ||
139 | } | ||
140 | |||
114 | struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, | 141 | struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *, |
115 | char *); | 142 | char *); |
116 | extern void v9fs_session_close(struct v9fs_session_info *v9ses); | 143 | extern void v9fs_session_close(struct v9fs_session_info *v9ses); |
@@ -124,16 +151,15 @@ extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
124 | struct inode *new_dir, struct dentry *new_dentry); | 151 | struct inode *new_dir, struct dentry *new_dentry); |
125 | extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, | 152 | extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, |
126 | void *p); | 153 | void *p); |
127 | extern struct inode *v9fs_inode(struct v9fs_session_info *v9ses, | 154 | extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, |
128 | struct p9_fid *fid, | 155 | struct p9_fid *fid, |
129 | struct super_block *sb); | 156 | struct super_block *sb); |
130 | |||
131 | extern const struct inode_operations v9fs_dir_inode_operations_dotl; | 157 | extern const struct inode_operations v9fs_dir_inode_operations_dotl; |
132 | extern const struct inode_operations v9fs_file_inode_operations_dotl; | 158 | extern const struct inode_operations v9fs_file_inode_operations_dotl; |
133 | extern const struct inode_operations v9fs_symlink_inode_operations_dotl; | 159 | extern const struct inode_operations v9fs_symlink_inode_operations_dotl; |
134 | extern struct inode *v9fs_inode_dotl(struct v9fs_session_info *v9ses, | 160 | extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, |
135 | struct p9_fid *fid, | 161 | struct p9_fid *fid, |
136 | struct super_block *sb); | 162 | struct super_block *sb); |
137 | 163 | ||
138 | /* other default globals */ | 164 | /* other default globals */ |
139 | #define V9FS_PORT 564 | 165 | #define V9FS_PORT 564 |
@@ -158,7 +184,7 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) | |||
158 | } | 184 | } |
159 | 185 | ||
160 | /** | 186 | /** |
161 | * v9fs_inode_from_fid - Helper routine to populate an inode by | 187 | * v9fs_get_inode_from_fid - Helper routine to populate an inode by |
162 | * issuing a attribute request | 188 | * issuing a attribute request |
163 | * @v9ses: session information | 189 | * @v9ses: session information |
164 | * @fid: fid to issue attribute request for | 190 | * @fid: fid to issue attribute request for |
@@ -166,11 +192,12 @@ static inline int v9fs_proto_dotl(struct v9fs_session_info *v9ses) | |||
166 | * | 192 | * |
167 | */ | 193 | */ |
168 | static inline struct inode * | 194 | static inline struct inode * |
169 | v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | 195 | v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, |
170 | struct super_block *sb) | 196 | struct super_block *sb) |
171 | { | 197 | { |
172 | if (v9fs_proto_dotl(v9ses)) | 198 | if (v9fs_proto_dotl(v9ses)) |
173 | return v9fs_inode_dotl(v9ses, fid, sb); | 199 | return v9fs_inode_from_fid_dotl(v9ses, fid, sb); |
174 | else | 200 | else |
175 | return v9fs_inode(v9ses, fid, sb); | 201 | return v9fs_inode_from_fid(v9ses, fid, sb); |
176 | } | 202 | } |
203 | #endif | ||
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index b789f8e597e..4014160903a 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
@@ -20,6 +20,8 @@ | |||
20 | * Boston, MA 02111-1301 USA | 20 | * Boston, MA 02111-1301 USA |
21 | * | 21 | * |
22 | */ | 22 | */ |
23 | #ifndef FS_9P_V9FS_VFS_H | ||
24 | #define FS_9P_V9FS_VFS_H | ||
23 | 25 | ||
24 | /* plan9 semantics are that created files are implicitly opened. | 26 | /* plan9 semantics are that created files are implicitly opened. |
25 | * But linux semantics are that you call create, then open. | 27 | * But linux semantics are that you call create, then open. |
@@ -36,6 +38,7 @@ | |||
36 | * unlink calls remove, which is an implicit clunk. So we have to track | 38 | * unlink calls remove, which is an implicit clunk. So we have to track |
37 | * that kind of thing so that we don't try to clunk a dead fid. | 39 | * that kind of thing so that we don't try to clunk a dead fid. |
38 | */ | 40 | */ |
41 | #define P9_LOCK_TIMEOUT (30*HZ) | ||
39 | 42 | ||
40 | extern struct file_system_type v9fs_fs_type; | 43 | extern struct file_system_type v9fs_fs_type; |
41 | extern const struct address_space_operations v9fs_addr_operations; | 44 | extern const struct address_space_operations v9fs_addr_operations; |
@@ -45,13 +48,15 @@ extern const struct file_operations v9fs_dir_operations; | |||
45 | extern const struct file_operations v9fs_dir_operations_dotl; | 48 | extern const struct file_operations v9fs_dir_operations_dotl; |
46 | extern const struct dentry_operations v9fs_dentry_operations; | 49 | extern const struct dentry_operations v9fs_dentry_operations; |
47 | extern const struct dentry_operations v9fs_cached_dentry_operations; | 50 | extern const struct dentry_operations v9fs_cached_dentry_operations; |
51 | extern const struct file_operations v9fs_cached_file_operations; | ||
52 | extern const struct file_operations v9fs_cached_file_operations_dotl; | ||
53 | extern struct kmem_cache *v9fs_inode_cache; | ||
48 | 54 | ||
49 | #ifdef CONFIG_9P_FSCACHE | ||
50 | struct inode *v9fs_alloc_inode(struct super_block *sb); | 55 | struct inode *v9fs_alloc_inode(struct super_block *sb); |
51 | void v9fs_destroy_inode(struct inode *inode); | 56 | void v9fs_destroy_inode(struct inode *inode); |
52 | #endif | ||
53 | |||
54 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 57 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); |
58 | int v9fs_init_inode(struct v9fs_session_info *v9ses, | ||
59 | struct inode *inode, int mode); | ||
55 | void v9fs_evict_inode(struct inode *inode); | 60 | void v9fs_evict_inode(struct inode *inode); |
56 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 61 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
57 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); | 62 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
@@ -62,8 +67,19 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); | |||
62 | int v9fs_uflags2omode(int uflags, int extended); | 67 | int v9fs_uflags2omode(int uflags, int extended); |
63 | 68 | ||
64 | ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); | 69 | ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); |
70 | ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64); | ||
65 | void v9fs_blank_wstat(struct p9_wstat *wstat); | 71 | void v9fs_blank_wstat(struct p9_wstat *wstat); |
66 | int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); | 72 | int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); |
67 | int v9fs_file_fsync_dotl(struct file *filp, int datasync); | 73 | int v9fs_file_fsync_dotl(struct file *filp, int datasync); |
68 | 74 | ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *, | |
69 | #define P9_LOCK_TIMEOUT (30*HZ) | 75 | const char __user *, size_t, loff_t *, int); |
76 | int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); | ||
77 | int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode); | ||
78 | static inline void v9fs_invalidate_inode_attr(struct inode *inode) | ||
79 | { | ||
80 | struct v9fs_inode *v9inode; | ||
81 | v9inode = V9FS_I(inode); | ||
82 | v9inode->cache_validity |= V9FS_INO_INVALID_ATTR; | ||
83 | return; | ||
84 | } | ||
85 | #endif | ||
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index b7f2a8e3863..2524e4cbb8e 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
@@ -39,16 +39,16 @@ | |||
39 | #include "v9fs.h" | 39 | #include "v9fs.h" |
40 | #include "v9fs_vfs.h" | 40 | #include "v9fs_vfs.h" |
41 | #include "cache.h" | 41 | #include "cache.h" |
42 | #include "fid.h" | ||
42 | 43 | ||
43 | /** | 44 | /** |
44 | * v9fs_vfs_readpage - read an entire page in from 9P | 45 | * v9fs_fid_readpage - read an entire page in from 9P |
45 | * | 46 | * |
46 | * @filp: file being read | 47 | * @fid: fid being read |
47 | * @page: structure to page | 48 | * @page: structure to page |
48 | * | 49 | * |
49 | */ | 50 | */ |
50 | 51 | static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page) | |
51 | static int v9fs_vfs_readpage(struct file *filp, struct page *page) | ||
52 | { | 52 | { |
53 | int retval; | 53 | int retval; |
54 | loff_t offset; | 54 | loff_t offset; |
@@ -67,7 +67,7 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) | |||
67 | buffer = kmap(page); | 67 | buffer = kmap(page); |
68 | offset = page_offset(page); | 68 | offset = page_offset(page); |
69 | 69 | ||
70 | retval = v9fs_file_readn(filp, buffer, NULL, PAGE_CACHE_SIZE, offset); | 70 | retval = v9fs_fid_readn(fid, buffer, NULL, PAGE_CACHE_SIZE, offset); |
71 | if (retval < 0) { | 71 | if (retval < 0) { |
72 | v9fs_uncache_page(inode, page); | 72 | v9fs_uncache_page(inode, page); |
73 | goto done; | 73 | goto done; |
@@ -87,6 +87,19 @@ done: | |||
87 | } | 87 | } |
88 | 88 | ||
89 | /** | 89 | /** |
90 | * v9fs_vfs_readpage - read an entire page in from 9P | ||
91 | * | ||
92 | * @filp: file being read | ||
93 | * @page: structure to page | ||
94 | * | ||
95 | */ | ||
96 | |||
97 | static int v9fs_vfs_readpage(struct file *filp, struct page *page) | ||
98 | { | ||
99 | return v9fs_fid_readpage(filp->private_data, page); | ||
100 | } | ||
101 | |||
102 | /** | ||
90 | * v9fs_vfs_readpages - read a set of pages from 9P | 103 | * v9fs_vfs_readpages - read a set of pages from 9P |
91 | * | 104 | * |
92 | * @filp: file being read | 105 | * @filp: file being read |
@@ -124,7 +137,6 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) | |||
124 | { | 137 | { |
125 | if (PagePrivate(page)) | 138 | if (PagePrivate(page)) |
126 | return 0; | 139 | return 0; |
127 | |||
128 | return v9fs_fscache_release_page(page, gfp); | 140 | return v9fs_fscache_release_page(page, gfp); |
129 | } | 141 | } |
130 | 142 | ||
@@ -137,20 +149,89 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) | |||
137 | 149 | ||
138 | static void v9fs_invalidate_page(struct page *page, unsigned long offset) | 150 | static void v9fs_invalidate_page(struct page *page, unsigned long offset) |
139 | { | 151 | { |
152 | /* | ||
153 | * If called with zero offset, we should release | ||
154 | * the private state assocated with the page | ||
155 | */ | ||
140 | if (offset == 0) | 156 | if (offset == 0) |
141 | v9fs_fscache_invalidate_page(page); | 157 | v9fs_fscache_invalidate_page(page); |
142 | } | 158 | } |
143 | 159 | ||
160 | static int v9fs_vfs_writepage_locked(struct page *page) | ||
161 | { | ||
162 | char *buffer; | ||
163 | int retval, len; | ||
164 | loff_t offset, size; | ||
165 | mm_segment_t old_fs; | ||
166 | struct v9fs_inode *v9inode; | ||
167 | struct inode *inode = page->mapping->host; | ||
168 | |||
169 | v9inode = V9FS_I(inode); | ||
170 | size = i_size_read(inode); | ||
171 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
172 | len = size & ~PAGE_CACHE_MASK; | ||
173 | else | ||
174 | len = PAGE_CACHE_SIZE; | ||
175 | |||
176 | set_page_writeback(page); | ||
177 | |||
178 | buffer = kmap(page); | ||
179 | offset = page_offset(page); | ||
180 | |||
181 | old_fs = get_fs(); | ||
182 | set_fs(get_ds()); | ||
183 | /* We should have writeback_fid always set */ | ||
184 | BUG_ON(!v9inode->writeback_fid); | ||
185 | |||
186 | retval = v9fs_file_write_internal(inode, | ||
187 | v9inode->writeback_fid, | ||
188 | (__force const char __user *)buffer, | ||
189 | len, &offset, 0); | ||
190 | if (retval > 0) | ||
191 | retval = 0; | ||
192 | |||
193 | set_fs(old_fs); | ||
194 | kunmap(page); | ||
195 | end_page_writeback(page); | ||
196 | return retval; | ||
197 | } | ||
198 | |||
199 | static int v9fs_vfs_writepage(struct page *page, struct writeback_control *wbc) | ||
200 | { | ||
201 | int retval; | ||
202 | |||
203 | retval = v9fs_vfs_writepage_locked(page); | ||
204 | if (retval < 0) { | ||
205 | if (retval == -EAGAIN) { | ||
206 | redirty_page_for_writepage(wbc, page); | ||
207 | retval = 0; | ||
208 | } else { | ||
209 | SetPageError(page); | ||
210 | mapping_set_error(page->mapping, retval); | ||
211 | } | ||
212 | } else | ||
213 | retval = 0; | ||
214 | |||
215 | unlock_page(page); | ||
216 | return retval; | ||
217 | } | ||
218 | |||
144 | /** | 219 | /** |
145 | * v9fs_launder_page - Writeback a dirty page | 220 | * v9fs_launder_page - Writeback a dirty page |
146 | * Since the writes go directly to the server, we simply return a 0 | ||
147 | * here to indicate success. | ||
148 | * | ||
149 | * Returns 0 on success. | 221 | * Returns 0 on success. |
150 | */ | 222 | */ |
151 | 223 | ||
152 | static int v9fs_launder_page(struct page *page) | 224 | static int v9fs_launder_page(struct page *page) |
153 | { | 225 | { |
226 | int retval; | ||
227 | struct inode *inode = page->mapping->host; | ||
228 | |||
229 | v9fs_fscache_wait_on_page_write(inode, page); | ||
230 | if (clear_page_dirty_for_io(page)) { | ||
231 | retval = v9fs_vfs_writepage_locked(page); | ||
232 | if (retval) | ||
233 | return retval; | ||
234 | } | ||
154 | return 0; | 235 | return 0; |
155 | } | 236 | } |
156 | 237 | ||
@@ -173,9 +254,15 @@ static int v9fs_launder_page(struct page *page) | |||
173 | * with an error. | 254 | * with an error. |
174 | * | 255 | * |
175 | */ | 256 | */ |
176 | ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | 257 | static ssize_t |
177 | loff_t pos, unsigned long nr_segs) | 258 | v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, |
259 | loff_t pos, unsigned long nr_segs) | ||
178 | { | 260 | { |
261 | /* | ||
262 | * FIXME | ||
263 | * Now that we do caching with cache mode enabled, We need | ||
264 | * to support direct IO | ||
265 | */ | ||
179 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " | 266 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) " |
180 | "off/no(%lld/%lu) EINVAL\n", | 267 | "off/no(%lld/%lu) EINVAL\n", |
181 | iocb->ki_filp->f_path.dentry->d_name.name, | 268 | iocb->ki_filp->f_path.dentry->d_name.name, |
@@ -183,11 +270,84 @@ ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, | |||
183 | 270 | ||
184 | return -EINVAL; | 271 | return -EINVAL; |
185 | } | 272 | } |
273 | |||
274 | static int v9fs_write_begin(struct file *filp, struct address_space *mapping, | ||
275 | loff_t pos, unsigned len, unsigned flags, | ||
276 | struct page **pagep, void **fsdata) | ||
277 | { | ||
278 | int retval = 0; | ||
279 | struct page *page; | ||
280 | struct v9fs_inode *v9inode; | ||
281 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
282 | struct inode *inode = mapping->host; | ||
283 | |||
284 | v9inode = V9FS_I(inode); | ||
285 | start: | ||
286 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
287 | if (!page) { | ||
288 | retval = -ENOMEM; | ||
289 | goto out; | ||
290 | } | ||
291 | BUG_ON(!v9inode->writeback_fid); | ||
292 | if (PageUptodate(page)) | ||
293 | goto out; | ||
294 | |||
295 | if (len == PAGE_CACHE_SIZE) | ||
296 | goto out; | ||
297 | |||
298 | retval = v9fs_fid_readpage(v9inode->writeback_fid, page); | ||
299 | page_cache_release(page); | ||
300 | if (!retval) | ||
301 | goto start; | ||
302 | out: | ||
303 | *pagep = page; | ||
304 | return retval; | ||
305 | } | ||
306 | |||
307 | static int v9fs_write_end(struct file *filp, struct address_space *mapping, | ||
308 | loff_t pos, unsigned len, unsigned copied, | ||
309 | struct page *page, void *fsdata) | ||
310 | { | ||
311 | loff_t last_pos = pos + copied; | ||
312 | struct inode *inode = page->mapping->host; | ||
313 | |||
314 | if (unlikely(copied < len)) { | ||
315 | /* | ||
316 | * zero out the rest of the area | ||
317 | */ | ||
318 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | ||
319 | |||
320 | zero_user(page, from + copied, len - copied); | ||
321 | flush_dcache_page(page); | ||
322 | } | ||
323 | |||
324 | if (!PageUptodate(page)) | ||
325 | SetPageUptodate(page); | ||
326 | /* | ||
327 | * No need to use i_size_read() here, the i_size | ||
328 | * cannot change under us because we hold the i_mutex. | ||
329 | */ | ||
330 | if (last_pos > inode->i_size) { | ||
331 | inode_add_bytes(inode, last_pos - inode->i_size); | ||
332 | i_size_write(inode, last_pos); | ||
333 | } | ||
334 | set_page_dirty(page); | ||
335 | unlock_page(page); | ||
336 | page_cache_release(page); | ||
337 | |||
338 | return copied; | ||
339 | } | ||
340 | |||
341 | |||
186 | const struct address_space_operations v9fs_addr_operations = { | 342 | const struct address_space_operations v9fs_addr_operations = { |
187 | .readpage = v9fs_vfs_readpage, | 343 | .readpage = v9fs_vfs_readpage, |
188 | .readpages = v9fs_vfs_readpages, | 344 | .readpages = v9fs_vfs_readpages, |
189 | .releasepage = v9fs_release_page, | 345 | .set_page_dirty = __set_page_dirty_nobuffers, |
190 | .invalidatepage = v9fs_invalidate_page, | 346 | .writepage = v9fs_vfs_writepage, |
191 | .launder_page = v9fs_launder_page, | 347 | .write_begin = v9fs_write_begin, |
192 | .direct_IO = v9fs_direct_IO, | 348 | .write_end = v9fs_write_end, |
349 | .releasepage = v9fs_release_page, | ||
350 | .invalidatepage = v9fs_invalidate_page, | ||
351 | .launder_page = v9fs_launder_page, | ||
352 | .direct_IO = v9fs_direct_IO, | ||
193 | }; | 353 | }; |
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index 233b7d4ffe5..b6a3b9f7fe4 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c | |||
@@ -63,20 +63,15 @@ static int v9fs_dentry_delete(const struct dentry *dentry) | |||
63 | * v9fs_cached_dentry_delete - called when dentry refcount equals 0 | 63 | * v9fs_cached_dentry_delete - called when dentry refcount equals 0 |
64 | * @dentry: dentry in question | 64 | * @dentry: dentry in question |
65 | * | 65 | * |
66 | * Only return 1 if our inode is invalid. Only non-synthetic files | ||
67 | * (ones without mtime == 0) should be calling this function. | ||
68 | * | ||
69 | */ | 66 | */ |
70 | |||
71 | static int v9fs_cached_dentry_delete(const struct dentry *dentry) | 67 | static int v9fs_cached_dentry_delete(const struct dentry *dentry) |
72 | { | 68 | { |
73 | struct inode *inode = dentry->d_inode; | 69 | P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", |
74 | P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name, | 70 | dentry->d_name.name, dentry); |
75 | dentry); | ||
76 | 71 | ||
77 | if(!inode) | 72 | /* Don't cache negative dentries */ |
73 | if (!dentry->d_inode) | ||
78 | return 1; | 74 | return 1; |
79 | |||
80 | return 0; | 75 | return 0; |
81 | } | 76 | } |
82 | 77 | ||
@@ -105,7 +100,41 @@ static void v9fs_dentry_release(struct dentry *dentry) | |||
105 | } | 100 | } |
106 | } | 101 | } |
107 | 102 | ||
103 | static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) | ||
104 | { | ||
105 | struct p9_fid *fid; | ||
106 | struct inode *inode; | ||
107 | struct v9fs_inode *v9inode; | ||
108 | |||
109 | if (nd->flags & LOOKUP_RCU) | ||
110 | return -ECHILD; | ||
111 | |||
112 | inode = dentry->d_inode; | ||
113 | if (!inode) | ||
114 | goto out_valid; | ||
115 | |||
116 | v9inode = V9FS_I(inode); | ||
117 | if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { | ||
118 | int retval; | ||
119 | struct v9fs_session_info *v9ses; | ||
120 | fid = v9fs_fid_lookup(dentry); | ||
121 | if (IS_ERR(fid)) | ||
122 | return PTR_ERR(fid); | ||
123 | |||
124 | v9ses = v9fs_inode2v9ses(inode); | ||
125 | if (v9fs_proto_dotl(v9ses)) | ||
126 | retval = v9fs_refresh_inode_dotl(fid, inode); | ||
127 | else | ||
128 | retval = v9fs_refresh_inode(fid, inode); | ||
129 | if (retval <= 0) | ||
130 | return retval; | ||
131 | } | ||
132 | out_valid: | ||
133 | return 1; | ||
134 | } | ||
135 | |||
108 | const struct dentry_operations v9fs_cached_dentry_operations = { | 136 | const struct dentry_operations v9fs_cached_dentry_operations = { |
137 | .d_revalidate = v9fs_lookup_revalidate, | ||
109 | .d_delete = v9fs_cached_dentry_delete, | 138 | .d_delete = v9fs_cached_dentry_delete, |
110 | .d_release = v9fs_dentry_release, | 139 | .d_release = v9fs_dentry_release, |
111 | }; | 140 | }; |
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index b84ebe8cefe..9c2bdda5cd9 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -295,7 +295,6 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
295 | P9_DPRINTK(P9_DEBUG_VFS, | 295 | P9_DPRINTK(P9_DEBUG_VFS, |
296 | "v9fs_dir_release: inode: %p filp: %p fid: %d\n", | 296 | "v9fs_dir_release: inode: %p filp: %p fid: %d\n", |
297 | inode, filp, fid ? fid->fid : -1); | 297 | inode, filp, fid ? fid->fid : -1); |
298 | filemap_write_and_wait(inode->i_mapping); | ||
299 | if (fid) | 298 | if (fid) |
300 | p9_client_clunk(fid); | 299 | p9_client_clunk(fid); |
301 | return 0; | 300 | return 0; |
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 240c3067439..78bcb97c342 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -44,8 +44,7 @@ | |||
44 | #include "fid.h" | 44 | #include "fid.h" |
45 | #include "cache.h" | 45 | #include "cache.h" |
46 | 46 | ||
47 | static const struct file_operations v9fs_cached_file_operations; | 47 | static const struct vm_operations_struct v9fs_file_vm_ops; |
48 | static const struct file_operations v9fs_cached_file_operations_dotl; | ||
49 | 48 | ||
50 | /** | 49 | /** |
51 | * v9fs_file_open - open a file (or directory) | 50 | * v9fs_file_open - open a file (or directory) |
@@ -57,11 +56,13 @@ static const struct file_operations v9fs_cached_file_operations_dotl; | |||
57 | int v9fs_file_open(struct inode *inode, struct file *file) | 56 | int v9fs_file_open(struct inode *inode, struct file *file) |
58 | { | 57 | { |
59 | int err; | 58 | int err; |
59 | struct v9fs_inode *v9inode; | ||
60 | struct v9fs_session_info *v9ses; | 60 | struct v9fs_session_info *v9ses; |
61 | struct p9_fid *fid; | 61 | struct p9_fid *fid; |
62 | int omode; | 62 | int omode; |
63 | 63 | ||
64 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); | 64 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file); |
65 | v9inode = V9FS_I(inode); | ||
65 | v9ses = v9fs_inode2v9ses(inode); | 66 | v9ses = v9fs_inode2v9ses(inode); |
66 | if (v9fs_proto_dotl(v9ses)) | 67 | if (v9fs_proto_dotl(v9ses)) |
67 | omode = file->f_flags; | 68 | omode = file->f_flags; |
@@ -89,20 +90,30 @@ int v9fs_file_open(struct inode *inode, struct file *file) | |||
89 | } | 90 | } |
90 | 91 | ||
91 | file->private_data = fid; | 92 | file->private_data = fid; |
92 | if ((fid->qid.version) && (v9ses->cache)) { | 93 | if (v9ses->cache && !v9inode->writeback_fid) { |
93 | P9_DPRINTK(P9_DEBUG_VFS, "cached"); | 94 | /* |
94 | /* enable cached file options */ | 95 | * clone a fid and add it to writeback_fid |
95 | if(file->f_op == &v9fs_file_operations) | 96 | * we do it during open time instead of |
96 | file->f_op = &v9fs_cached_file_operations; | 97 | * page dirty time via write_begin/page_mkwrite |
97 | else if (file->f_op == &v9fs_file_operations_dotl) | 98 | * because we want write after unlink usecase |
98 | file->f_op = &v9fs_cached_file_operations_dotl; | 99 | * to work. |
99 | 100 | */ | |
101 | fid = v9fs_writeback_fid(file->f_path.dentry); | ||
102 | if (IS_ERR(fid)) { | ||
103 | err = PTR_ERR(fid); | ||
104 | goto out_error; | ||
105 | } | ||
106 | v9inode->writeback_fid = (void *) fid; | ||
107 | } | ||
100 | #ifdef CONFIG_9P_FSCACHE | 108 | #ifdef CONFIG_9P_FSCACHE |
109 | if (v9ses->cache) | ||
101 | v9fs_cache_inode_set_cookie(inode, file); | 110 | v9fs_cache_inode_set_cookie(inode, file); |
102 | #endif | 111 | #endif |
103 | } | ||
104 | |||
105 | return 0; | 112 | return 0; |
113 | out_error: | ||
114 | p9_client_clunk(file->private_data); | ||
115 | file->private_data = NULL; | ||
116 | return err; | ||
106 | } | 117 | } |
107 | 118 | ||
108 | /** | 119 | /** |
@@ -335,25 +346,22 @@ out_err: | |||
335 | } | 346 | } |
336 | 347 | ||
337 | /** | 348 | /** |
338 | * v9fs_file_readn - read from a file | 349 | * v9fs_fid_readn - read from a fid |
339 | * @filp: file pointer to read | 350 | * @fid: fid to read |
340 | * @data: data buffer to read data into | 351 | * @data: data buffer to read data into |
341 | * @udata: user data buffer to read data into | 352 | * @udata: user data buffer to read data into |
342 | * @count: size of buffer | 353 | * @count: size of buffer |
343 | * @offset: offset at which to read data | 354 | * @offset: offset at which to read data |
344 | * | 355 | * |
345 | */ | 356 | */ |
346 | |||
347 | ssize_t | 357 | ssize_t |
348 | v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, | 358 | v9fs_fid_readn(struct p9_fid *fid, char *data, char __user *udata, u32 count, |
349 | u64 offset) | 359 | u64 offset) |
350 | { | 360 | { |
351 | int n, total, size; | 361 | int n, total, size; |
352 | struct p9_fid *fid = filp->private_data; | ||
353 | 362 | ||
354 | P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, | 363 | P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, |
355 | (long long unsigned) offset, count); | 364 | (long long unsigned) offset, count); |
356 | |||
357 | n = 0; | 365 | n = 0; |
358 | total = 0; | 366 | total = 0; |
359 | size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; | 367 | size = fid->iounit ? fid->iounit : fid->clnt->msize - P9_IOHDRSZ; |
@@ -379,6 +387,22 @@ v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, | |||
379 | } | 387 | } |
380 | 388 | ||
381 | /** | 389 | /** |
390 | * v9fs_file_readn - read from a file | ||
391 | * @filp: file pointer to read | ||
392 | * @data: data buffer to read data into | ||
393 | * @udata: user data buffer to read data into | ||
394 | * @count: size of buffer | ||
395 | * @offset: offset at which to read data | ||
396 | * | ||
397 | */ | ||
398 | ssize_t | ||
399 | v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, | ||
400 | u64 offset) | ||
401 | { | ||
402 | return v9fs_fid_readn(filp->private_data, data, udata, count, offset); | ||
403 | } | ||
404 | |||
405 | /** | ||
382 | * v9fs_file_read - read from a file | 406 | * v9fs_file_read - read from a file |
383 | * @filp: file pointer to read | 407 | * @filp: file pointer to read |
384 | * @udata: user data buffer to read data into | 408 | * @udata: user data buffer to read data into |
@@ -410,45 +434,22 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count, | |||
410 | return ret; | 434 | return ret; |
411 | } | 435 | } |
412 | 436 | ||
413 | /** | 437 | ssize_t |
414 | * v9fs_file_write - write to a file | 438 | v9fs_file_write_internal(struct inode *inode, struct p9_fid *fid, |
415 | * @filp: file pointer to write | 439 | const char __user *data, size_t count, |
416 | * @data: data buffer to write data from | 440 | loff_t *offset, int invalidate) |
417 | * @count: size of buffer | ||
418 | * @offset: offset at which to write data | ||
419 | * | ||
420 | */ | ||
421 | |||
422 | static ssize_t | ||
423 | v9fs_file_write(struct file *filp, const char __user * data, | ||
424 | size_t count, loff_t * offset) | ||
425 | { | 441 | { |
426 | ssize_t retval; | ||
427 | size_t total = 0; | ||
428 | int n; | 442 | int n; |
429 | struct p9_fid *fid; | 443 | loff_t i_size; |
444 | size_t total = 0; | ||
430 | struct p9_client *clnt; | 445 | struct p9_client *clnt; |
431 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
432 | loff_t origin = *offset; | 446 | loff_t origin = *offset; |
433 | unsigned long pg_start, pg_end; | 447 | unsigned long pg_start, pg_end; |
434 | 448 | ||
435 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, | 449 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, |
436 | (int)count, (int)*offset); | 450 | (int)count, (int)*offset); |
437 | 451 | ||
438 | fid = filp->private_data; | ||
439 | clnt = fid->clnt; | 452 | clnt = fid->clnt; |
440 | |||
441 | retval = generic_write_checks(filp, &origin, &count, 0); | ||
442 | if (retval) | ||
443 | goto out; | ||
444 | |||
445 | retval = -EINVAL; | ||
446 | if ((ssize_t) count < 0) | ||
447 | goto out; | ||
448 | retval = 0; | ||
449 | if (!count) | ||
450 | goto out; | ||
451 | |||
452 | do { | 453 | do { |
453 | n = p9_client_write(fid, NULL, data+total, origin+total, count); | 454 | n = p9_client_write(fid, NULL, data+total, origin+total, count); |
454 | if (n <= 0) | 455 | if (n <= 0) |
@@ -457,25 +458,60 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
457 | total += n; | 458 | total += n; |
458 | } while (count > 0); | 459 | } while (count > 0); |
459 | 460 | ||
460 | if (total > 0) { | 461 | if (invalidate && (total > 0)) { |
461 | pg_start = origin >> PAGE_CACHE_SHIFT; | 462 | pg_start = origin >> PAGE_CACHE_SHIFT; |
462 | pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; | 463 | pg_end = (origin + total - 1) >> PAGE_CACHE_SHIFT; |
463 | if (inode->i_mapping && inode->i_mapping->nrpages) | 464 | if (inode->i_mapping && inode->i_mapping->nrpages) |
464 | invalidate_inode_pages2_range(inode->i_mapping, | 465 | invalidate_inode_pages2_range(inode->i_mapping, |
465 | pg_start, pg_end); | 466 | pg_start, pg_end); |
466 | *offset += total; | 467 | *offset += total; |
467 | i_size_write(inode, i_size_read(inode) + total); | 468 | i_size = i_size_read(inode); |
468 | inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; | 469 | if (*offset > i_size) { |
470 | inode_add_bytes(inode, *offset - i_size); | ||
471 | i_size_write(inode, *offset); | ||
472 | } | ||
469 | } | 473 | } |
470 | |||
471 | if (n < 0) | 474 | if (n < 0) |
472 | retval = n; | 475 | return n; |
473 | else | 476 | |
474 | retval = total; | 477 | return total; |
478 | } | ||
479 | |||
480 | /** | ||
481 | * v9fs_file_write - write to a file | ||
482 | * @filp: file pointer to write | ||
483 | * @data: data buffer to write data from | ||
484 | * @count: size of buffer | ||
485 | * @offset: offset at which to write data | ||
486 | * | ||
487 | */ | ||
488 | static ssize_t | ||
489 | v9fs_file_write(struct file *filp, const char __user * data, | ||
490 | size_t count, loff_t *offset) | ||
491 | { | ||
492 | ssize_t retval = 0; | ||
493 | loff_t origin = *offset; | ||
494 | |||
495 | |||
496 | retval = generic_write_checks(filp, &origin, &count, 0); | ||
497 | if (retval) | ||
498 | goto out; | ||
499 | |||
500 | retval = -EINVAL; | ||
501 | if ((ssize_t) count < 0) | ||
502 | goto out; | ||
503 | retval = 0; | ||
504 | if (!count) | ||
505 | goto out; | ||
506 | |||
507 | return v9fs_file_write_internal(filp->f_path.dentry->d_inode, | ||
508 | filp->private_data, | ||
509 | data, count, offset, 1); | ||
475 | out: | 510 | out: |
476 | return retval; | 511 | return retval; |
477 | } | 512 | } |
478 | 513 | ||
514 | |||
479 | static int v9fs_file_fsync(struct file *filp, int datasync) | 515 | static int v9fs_file_fsync(struct file *filp, int datasync) |
480 | { | 516 | { |
481 | struct p9_fid *fid; | 517 | struct p9_fid *fid; |
@@ -505,28 +541,182 @@ int v9fs_file_fsync_dotl(struct file *filp, int datasync) | |||
505 | return retval; | 541 | return retval; |
506 | } | 542 | } |
507 | 543 | ||
508 | static const struct file_operations v9fs_cached_file_operations = { | 544 | static int |
545 | v9fs_file_mmap(struct file *file, struct vm_area_struct *vma) | ||
546 | { | ||
547 | int retval; | ||
548 | |||
549 | retval = generic_file_mmap(file, vma); | ||
550 | if (!retval) | ||
551 | vma->vm_ops = &v9fs_file_vm_ops; | ||
552 | |||
553 | return retval; | ||
554 | } | ||
555 | |||
556 | static int | ||
557 | v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
558 | { | ||
559 | struct v9fs_inode *v9inode; | ||
560 | struct page *page = vmf->page; | ||
561 | struct file *filp = vma->vm_file; | ||
562 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
563 | |||
564 | |||
565 | P9_DPRINTK(P9_DEBUG_VFS, "page %p fid %lx\n", | ||
566 | page, (unsigned long)filp->private_data); | ||
567 | |||
568 | v9inode = V9FS_I(inode); | ||
569 | /* make sure the cache has finished storing the page */ | ||
570 | v9fs_fscache_wait_on_page_write(inode, page); | ||
571 | BUG_ON(!v9inode->writeback_fid); | ||
572 | lock_page(page); | ||
573 | if (page->mapping != inode->i_mapping) | ||
574 | goto out_unlock; | ||
575 | |||
576 | return VM_FAULT_LOCKED; | ||
577 | out_unlock: | ||
578 | unlock_page(page); | ||
579 | return VM_FAULT_NOPAGE; | ||
580 | } | ||
581 | |||
582 | static ssize_t | ||
583 | v9fs_direct_read(struct file *filp, char __user *udata, size_t count, | ||
584 | loff_t *offsetp) | ||
585 | { | ||
586 | loff_t size, offset; | ||
587 | struct inode *inode; | ||
588 | struct address_space *mapping; | ||
589 | |||
590 | offset = *offsetp; | ||
591 | mapping = filp->f_mapping; | ||
592 | inode = mapping->host; | ||
593 | if (!count) | ||
594 | return 0; | ||
595 | size = i_size_read(inode); | ||
596 | if (offset < size) | ||
597 | filemap_write_and_wait_range(mapping, offset, | ||
598 | offset + count - 1); | ||
599 | |||
600 | return v9fs_file_read(filp, udata, count, offsetp); | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * v9fs_cached_file_read - read from a file | ||
605 | * @filp: file pointer to read | ||
606 | * @udata: user data buffer to read data into | ||
607 | * @count: size of buffer | ||
608 | * @offset: offset at which to read data | ||
609 | * | ||
610 | */ | ||
611 | static ssize_t | ||
612 | v9fs_cached_file_read(struct file *filp, char __user *data, size_t count, | ||
613 | loff_t *offset) | ||
614 | { | ||
615 | if (filp->f_flags & O_DIRECT) | ||
616 | return v9fs_direct_read(filp, data, count, offset); | ||
617 | return do_sync_read(filp, data, count, offset); | ||
618 | } | ||
619 | |||
620 | static ssize_t | ||
621 | v9fs_direct_write(struct file *filp, const char __user * data, | ||
622 | size_t count, loff_t *offsetp) | ||
623 | { | ||
624 | loff_t offset; | ||
625 | ssize_t retval; | ||
626 | struct inode *inode; | ||
627 | struct address_space *mapping; | ||
628 | |||
629 | offset = *offsetp; | ||
630 | mapping = filp->f_mapping; | ||
631 | inode = mapping->host; | ||
632 | if (!count) | ||
633 | return 0; | ||
634 | |||
635 | mutex_lock(&inode->i_mutex); | ||
636 | retval = filemap_write_and_wait_range(mapping, offset, | ||
637 | offset + count - 1); | ||
638 | if (retval) | ||
639 | goto err_out; | ||
640 | /* | ||
641 | * After a write we want buffered reads to be sure to go to disk to get | ||
642 | * the new data. We invalidate clean cached page from the region we're | ||
643 | * about to write. We do this *before* the write so that if we fail | ||
644 | * here we fall back to buffered write | ||
645 | */ | ||
646 | if (mapping->nrpages) { | ||
647 | pgoff_t pg_start = offset >> PAGE_CACHE_SHIFT; | ||
648 | pgoff_t pg_end = (offset + count - 1) >> PAGE_CACHE_SHIFT; | ||
649 | |||
650 | retval = invalidate_inode_pages2_range(mapping, | ||
651 | pg_start, pg_end); | ||
652 | /* | ||
653 | * If a page can not be invalidated, fall back | ||
654 | * to buffered write. | ||
655 | */ | ||
656 | if (retval) { | ||
657 | if (retval == -EBUSY) | ||
658 | goto buff_write; | ||
659 | goto err_out; | ||
660 | } | ||
661 | } | ||
662 | retval = v9fs_file_write(filp, data, count, offsetp); | ||
663 | err_out: | ||
664 | mutex_unlock(&inode->i_mutex); | ||
665 | return retval; | ||
666 | |||
667 | buff_write: | ||
668 | mutex_unlock(&inode->i_mutex); | ||
669 | return do_sync_write(filp, data, count, offsetp); | ||
670 | } | ||
671 | |||
672 | /** | ||
673 | * v9fs_cached_file_write - write to a file | ||
674 | * @filp: file pointer to write | ||
675 | * @data: data buffer to write data from | ||
676 | * @count: size of buffer | ||
677 | * @offset: offset at which to write data | ||
678 | * | ||
679 | */ | ||
680 | static ssize_t | ||
681 | v9fs_cached_file_write(struct file *filp, const char __user * data, | ||
682 | size_t count, loff_t *offset) | ||
683 | { | ||
684 | |||
685 | if (filp->f_flags & O_DIRECT) | ||
686 | return v9fs_direct_write(filp, data, count, offset); | ||
687 | return do_sync_write(filp, data, count, offset); | ||
688 | } | ||
689 | |||
690 | static const struct vm_operations_struct v9fs_file_vm_ops = { | ||
691 | .fault = filemap_fault, | ||
692 | .page_mkwrite = v9fs_vm_page_mkwrite, | ||
693 | }; | ||
694 | |||
695 | |||
696 | const struct file_operations v9fs_cached_file_operations = { | ||
509 | .llseek = generic_file_llseek, | 697 | .llseek = generic_file_llseek, |
510 | .read = do_sync_read, | 698 | .read = v9fs_cached_file_read, |
699 | .write = v9fs_cached_file_write, | ||
511 | .aio_read = generic_file_aio_read, | 700 | .aio_read = generic_file_aio_read, |
512 | .write = v9fs_file_write, | 701 | .aio_write = generic_file_aio_write, |
513 | .open = v9fs_file_open, | 702 | .open = v9fs_file_open, |
514 | .release = v9fs_dir_release, | 703 | .release = v9fs_dir_release, |
515 | .lock = v9fs_file_lock, | 704 | .lock = v9fs_file_lock, |
516 | .mmap = generic_file_readonly_mmap, | 705 | .mmap = v9fs_file_mmap, |
517 | .fsync = v9fs_file_fsync, | 706 | .fsync = v9fs_file_fsync, |
518 | }; | 707 | }; |
519 | 708 | ||
520 | static const struct file_operations v9fs_cached_file_operations_dotl = { | 709 | const struct file_operations v9fs_cached_file_operations_dotl = { |
521 | .llseek = generic_file_llseek, | 710 | .llseek = generic_file_llseek, |
522 | .read = do_sync_read, | 711 | .read = v9fs_cached_file_read, |
712 | .write = v9fs_cached_file_write, | ||
523 | .aio_read = generic_file_aio_read, | 713 | .aio_read = generic_file_aio_read, |
524 | .write = v9fs_file_write, | 714 | .aio_write = generic_file_aio_write, |
525 | .open = v9fs_file_open, | 715 | .open = v9fs_file_open, |
526 | .release = v9fs_dir_release, | 716 | .release = v9fs_dir_release, |
527 | .lock = v9fs_file_lock_dotl, | 717 | .lock = v9fs_file_lock_dotl, |
528 | .flock = v9fs_file_flock_dotl, | 718 | .flock = v9fs_file_flock_dotl, |
529 | .mmap = generic_file_readonly_mmap, | 719 | .mmap = v9fs_file_mmap, |
530 | .fsync = v9fs_file_fsync_dotl, | 720 | .fsync = v9fs_file_fsync_dotl, |
531 | }; | 721 | }; |
532 | 722 | ||
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index b76a40bdf4c..8a2c232f708 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -203,26 +203,25 @@ v9fs_blank_wstat(struct p9_wstat *wstat) | |||
203 | wstat->extension = NULL; | 203 | wstat->extension = NULL; |
204 | } | 204 | } |
205 | 205 | ||
206 | #ifdef CONFIG_9P_FSCACHE | ||
207 | /** | 206 | /** |
208 | * v9fs_alloc_inode - helper function to allocate an inode | 207 | * v9fs_alloc_inode - helper function to allocate an inode |
209 | * This callback is executed before setting up the inode so that we | ||
210 | * can associate a vcookie with each inode. | ||
211 | * | 208 | * |
212 | */ | 209 | */ |
213 | |||
214 | struct inode *v9fs_alloc_inode(struct super_block *sb) | 210 | struct inode *v9fs_alloc_inode(struct super_block *sb) |
215 | { | 211 | { |
216 | struct v9fs_cookie *vcookie; | 212 | struct v9fs_inode *v9inode; |
217 | vcookie = (struct v9fs_cookie *)kmem_cache_alloc(vcookie_cache, | 213 | v9inode = (struct v9fs_inode *)kmem_cache_alloc(v9fs_inode_cache, |
218 | GFP_KERNEL); | 214 | GFP_KERNEL); |
219 | if (!vcookie) | 215 | if (!v9inode) |
220 | return NULL; | 216 | return NULL; |
221 | 217 | #ifdef CONFIG_9P_FSCACHE | |
222 | vcookie->fscache = NULL; | 218 | v9inode->fscache = NULL; |
223 | vcookie->qid = NULL; | 219 | v9inode->fscache_key = NULL; |
224 | spin_lock_init(&vcookie->lock); | 220 | spin_lock_init(&v9inode->fscache_lock); |
225 | return &vcookie->inode; | 221 | #endif |
222 | v9inode->writeback_fid = NULL; | ||
223 | v9inode->cache_validity = 0; | ||
224 | return &v9inode->vfs_inode; | ||
226 | } | 225 | } |
227 | 226 | ||
228 | /** | 227 | /** |
@@ -234,35 +233,18 @@ static void v9fs_i_callback(struct rcu_head *head) | |||
234 | { | 233 | { |
235 | struct inode *inode = container_of(head, struct inode, i_rcu); | 234 | struct inode *inode = container_of(head, struct inode, i_rcu); |
236 | INIT_LIST_HEAD(&inode->i_dentry); | 235 | INIT_LIST_HEAD(&inode->i_dentry); |
237 | kmem_cache_free(vcookie_cache, v9fs_inode2cookie(inode)); | 236 | kmem_cache_free(v9fs_inode_cache, V9FS_I(inode)); |
238 | } | 237 | } |
239 | 238 | ||
240 | void v9fs_destroy_inode(struct inode *inode) | 239 | void v9fs_destroy_inode(struct inode *inode) |
241 | { | 240 | { |
242 | call_rcu(&inode->i_rcu, v9fs_i_callback); | 241 | call_rcu(&inode->i_rcu, v9fs_i_callback); |
243 | } | 242 | } |
244 | #endif | ||
245 | 243 | ||
246 | /** | 244 | int v9fs_init_inode(struct v9fs_session_info *v9ses, |
247 | * v9fs_get_inode - helper function to setup an inode | 245 | struct inode *inode, int mode) |
248 | * @sb: superblock | ||
249 | * @mode: mode to setup inode with | ||
250 | * | ||
251 | */ | ||
252 | |||
253 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | ||
254 | { | 246 | { |
255 | int err; | 247 | int err = 0; |
256 | struct inode *inode; | ||
257 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
258 | |||
259 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); | ||
260 | |||
261 | inode = new_inode(sb); | ||
262 | if (!inode) { | ||
263 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | ||
264 | return ERR_PTR(-ENOMEM); | ||
265 | } | ||
266 | 248 | ||
267 | inode_init_owner(inode, NULL, mode); | 249 | inode_init_owner(inode, NULL, mode); |
268 | inode->i_blocks = 0; | 250 | inode->i_blocks = 0; |
@@ -292,14 +274,20 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
292 | case S_IFREG: | 274 | case S_IFREG: |
293 | if (v9fs_proto_dotl(v9ses)) { | 275 | if (v9fs_proto_dotl(v9ses)) { |
294 | inode->i_op = &v9fs_file_inode_operations_dotl; | 276 | inode->i_op = &v9fs_file_inode_operations_dotl; |
295 | inode->i_fop = &v9fs_file_operations_dotl; | 277 | if (v9ses->cache) |
278 | inode->i_fop = | ||
279 | &v9fs_cached_file_operations_dotl; | ||
280 | else | ||
281 | inode->i_fop = &v9fs_file_operations_dotl; | ||
296 | } else { | 282 | } else { |
297 | inode->i_op = &v9fs_file_inode_operations; | 283 | inode->i_op = &v9fs_file_inode_operations; |
298 | inode->i_fop = &v9fs_file_operations; | 284 | if (v9ses->cache) |
285 | inode->i_fop = &v9fs_cached_file_operations; | ||
286 | else | ||
287 | inode->i_fop = &v9fs_file_operations; | ||
299 | } | 288 | } |
300 | 289 | ||
301 | break; | 290 | break; |
302 | |||
303 | case S_IFLNK: | 291 | case S_IFLNK: |
304 | if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { | 292 | if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses)) { |
305 | P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " | 293 | P9_DPRINTK(P9_DEBUG_ERROR, "extended modes used with " |
@@ -335,12 +323,37 @@ struct inode *v9fs_get_inode(struct super_block *sb, int mode) | |||
335 | err = -EINVAL; | 323 | err = -EINVAL; |
336 | goto error; | 324 | goto error; |
337 | } | 325 | } |
326 | error: | ||
327 | return err; | ||
338 | 328 | ||
339 | return inode; | 329 | } |
340 | 330 | ||
341 | error: | 331 | /** |
342 | iput(inode); | 332 | * v9fs_get_inode - helper function to setup an inode |
343 | return ERR_PTR(err); | 333 | * @sb: superblock |
334 | * @mode: mode to setup inode with | ||
335 | * | ||
336 | */ | ||
337 | |||
338 | struct inode *v9fs_get_inode(struct super_block *sb, int mode) | ||
339 | { | ||
340 | int err; | ||
341 | struct inode *inode; | ||
342 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
343 | |||
344 | P9_DPRINTK(P9_DEBUG_VFS, "super block: %p mode: %o\n", sb, mode); | ||
345 | |||
346 | inode = new_inode(sb); | ||
347 | if (!inode) { | ||
348 | P9_EPRINTK(KERN_WARNING, "Problem allocating inode\n"); | ||
349 | return ERR_PTR(-ENOMEM); | ||
350 | } | ||
351 | err = v9fs_init_inode(v9ses, inode, mode); | ||
352 | if (err) { | ||
353 | iput(inode); | ||
354 | return ERR_PTR(err); | ||
355 | } | ||
356 | return inode; | ||
344 | } | 357 | } |
345 | 358 | ||
346 | /* | 359 | /* |
@@ -403,6 +416,8 @@ error: | |||
403 | */ | 416 | */ |
404 | void v9fs_evict_inode(struct inode *inode) | 417 | void v9fs_evict_inode(struct inode *inode) |
405 | { | 418 | { |
419 | struct v9fs_inode *v9inode = V9FS_I(inode); | ||
420 | |||
406 | truncate_inode_pages(inode->i_mapping, 0); | 421 | truncate_inode_pages(inode->i_mapping, 0); |
407 | end_writeback(inode); | 422 | end_writeback(inode); |
408 | filemap_fdatawrite(inode->i_mapping); | 423 | filemap_fdatawrite(inode->i_mapping); |
@@ -410,41 +425,67 @@ void v9fs_evict_inode(struct inode *inode) | |||
410 | #ifdef CONFIG_9P_FSCACHE | 425 | #ifdef CONFIG_9P_FSCACHE |
411 | v9fs_cache_inode_put_cookie(inode); | 426 | v9fs_cache_inode_put_cookie(inode); |
412 | #endif | 427 | #endif |
428 | /* clunk the fid stashed in writeback_fid */ | ||
429 | if (v9inode->writeback_fid) { | ||
430 | p9_client_clunk(v9inode->writeback_fid); | ||
431 | v9inode->writeback_fid = NULL; | ||
432 | } | ||
413 | } | 433 | } |
414 | 434 | ||
415 | struct inode * | 435 | static struct inode *v9fs_qid_iget(struct super_block *sb, |
416 | v9fs_inode(struct v9fs_session_info *v9ses, struct p9_fid *fid, | 436 | struct p9_qid *qid, |
417 | struct super_block *sb) | 437 | struct p9_wstat *st) |
418 | { | 438 | { |
419 | int err, umode; | 439 | int retval, umode; |
420 | struct inode *ret = NULL; | 440 | unsigned long i_ino; |
421 | struct p9_wstat *st; | 441 | struct inode *inode; |
422 | 442 | struct v9fs_session_info *v9ses = sb->s_fs_info; | |
423 | st = p9_client_stat(fid); | ||
424 | if (IS_ERR(st)) | ||
425 | return ERR_CAST(st); | ||
426 | 443 | ||
444 | i_ino = v9fs_qid2ino(qid); | ||
445 | inode = iget_locked(sb, i_ino); | ||
446 | if (!inode) | ||
447 | return ERR_PTR(-ENOMEM); | ||
448 | if (!(inode->i_state & I_NEW)) | ||
449 | return inode; | ||
450 | /* | ||
451 | * initialize the inode with the stat info | ||
452 | * FIXME!! we may need support for stale inodes | ||
453 | * later. | ||
454 | */ | ||
427 | umode = p9mode2unixmode(v9ses, st->mode); | 455 | umode = p9mode2unixmode(v9ses, st->mode); |
428 | ret = v9fs_get_inode(sb, umode); | 456 | retval = v9fs_init_inode(v9ses, inode, umode); |
429 | if (IS_ERR(ret)) { | 457 | if (retval) |
430 | err = PTR_ERR(ret); | ||
431 | goto error; | 458 | goto error; |
432 | } | ||
433 | |||
434 | v9fs_stat2inode(st, ret, sb); | ||
435 | ret->i_ino = v9fs_qid2ino(&st->qid); | ||
436 | 459 | ||
460 | v9fs_stat2inode(st, inode, sb); | ||
437 | #ifdef CONFIG_9P_FSCACHE | 461 | #ifdef CONFIG_9P_FSCACHE |
438 | v9fs_vcookie_set_qid(ret, &st->qid); | 462 | v9fs_fscache_set_key(inode, &st->qid); |
439 | v9fs_cache_inode_get_cookie(ret); | 463 | v9fs_cache_inode_get_cookie(inode); |
440 | #endif | 464 | #endif |
441 | p9stat_free(st); | 465 | unlock_new_inode(inode); |
442 | kfree(st); | 466 | return inode; |
443 | return ret; | ||
444 | error: | 467 | error: |
468 | unlock_new_inode(inode); | ||
469 | iput(inode); | ||
470 | return ERR_PTR(retval); | ||
471 | |||
472 | } | ||
473 | |||
474 | struct inode * | ||
475 | v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | ||
476 | struct super_block *sb) | ||
477 | { | ||
478 | struct p9_wstat *st; | ||
479 | struct inode *inode = NULL; | ||
480 | |||
481 | st = p9_client_stat(fid); | ||
482 | if (IS_ERR(st)) | ||
483 | return ERR_CAST(st); | ||
484 | |||
485 | inode = v9fs_qid_iget(sb, &st->qid, st); | ||
445 | p9stat_free(st); | 486 | p9stat_free(st); |
446 | kfree(st); | 487 | kfree(st); |
447 | return ERR_PTR(err); | 488 | return inode; |
448 | } | 489 | } |
449 | 490 | ||
450 | /** | 491 | /** |
@@ -458,8 +499,8 @@ error: | |||
458 | static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) | 499 | static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) |
459 | { | 500 | { |
460 | int retval; | 501 | int retval; |
461 | struct inode *file_inode; | ||
462 | struct p9_fid *v9fid; | 502 | struct p9_fid *v9fid; |
503 | struct inode *file_inode; | ||
463 | 504 | ||
464 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, | 505 | P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file, |
465 | rmdir); | 506 | rmdir); |
@@ -470,8 +511,20 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) | |||
470 | return PTR_ERR(v9fid); | 511 | return PTR_ERR(v9fid); |
471 | 512 | ||
472 | retval = p9_client_remove(v9fid); | 513 | retval = p9_client_remove(v9fid); |
473 | if (!retval) | 514 | if (!retval) { |
474 | drop_nlink(file_inode); | 515 | /* |
516 | * directories on unlink should have zero | ||
517 | * link count | ||
518 | */ | ||
519 | if (rmdir) { | ||
520 | clear_nlink(file_inode); | ||
521 | drop_nlink(dir); | ||
522 | } else | ||
523 | drop_nlink(file_inode); | ||
524 | |||
525 | v9fs_invalidate_inode_attr(file_inode); | ||
526 | v9fs_invalidate_inode_attr(dir); | ||
527 | } | ||
475 | return retval; | 528 | return retval; |
476 | } | 529 | } |
477 | 530 | ||
@@ -531,7 +584,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
531 | } | 584 | } |
532 | 585 | ||
533 | /* instantiate inode and assign the unopened fid to the dentry */ | 586 | /* instantiate inode and assign the unopened fid to the dentry */ |
534 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 587 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); |
535 | if (IS_ERR(inode)) { | 588 | if (IS_ERR(inode)) { |
536 | err = PTR_ERR(inode); | 589 | err = PTR_ERR(inode); |
537 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 590 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
@@ -570,9 +623,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
570 | int err; | 623 | int err; |
571 | u32 perm; | 624 | u32 perm; |
572 | int flags; | 625 | int flags; |
573 | struct v9fs_session_info *v9ses; | ||
574 | struct p9_fid *fid; | ||
575 | struct file *filp; | 626 | struct file *filp; |
627 | struct v9fs_inode *v9inode; | ||
628 | struct v9fs_session_info *v9ses; | ||
629 | struct p9_fid *fid, *inode_fid; | ||
576 | 630 | ||
577 | err = 0; | 631 | err = 0; |
578 | fid = NULL; | 632 | fid = NULL; |
@@ -592,8 +646,25 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
592 | goto error; | 646 | goto error; |
593 | } | 647 | } |
594 | 648 | ||
649 | v9fs_invalidate_inode_attr(dir); | ||
595 | /* if we are opening a file, assign the open fid to the file */ | 650 | /* if we are opening a file, assign the open fid to the file */ |
596 | if (nd && nd->flags & LOOKUP_OPEN) { | 651 | if (nd && nd->flags & LOOKUP_OPEN) { |
652 | v9inode = V9FS_I(dentry->d_inode); | ||
653 | if (v9ses->cache && !v9inode->writeback_fid) { | ||
654 | /* | ||
655 | * clone a fid and add it to writeback_fid | ||
656 | * we do it during open time instead of | ||
657 | * page dirty time via write_begin/page_mkwrite | ||
658 | * because we want write after unlink usecase | ||
659 | * to work. | ||
660 | */ | ||
661 | inode_fid = v9fs_writeback_fid(dentry); | ||
662 | if (IS_ERR(inode_fid)) { | ||
663 | err = PTR_ERR(inode_fid); | ||
664 | goto error; | ||
665 | } | ||
666 | v9inode->writeback_fid = (void *) inode_fid; | ||
667 | } | ||
597 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); | 668 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); |
598 | if (IS_ERR(filp)) { | 669 | if (IS_ERR(filp)) { |
599 | err = PTR_ERR(filp); | 670 | err = PTR_ERR(filp); |
@@ -601,6 +672,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
601 | } | 672 | } |
602 | 673 | ||
603 | filp->private_data = fid; | 674 | filp->private_data = fid; |
675 | #ifdef CONFIG_9P_FSCACHE | ||
676 | if (v9ses->cache) | ||
677 | v9fs_cache_inode_set_cookie(dentry->d_inode, filp); | ||
678 | #endif | ||
604 | } else | 679 | } else |
605 | p9_client_clunk(fid); | 680 | p9_client_clunk(fid); |
606 | 681 | ||
@@ -625,8 +700,8 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
625 | { | 700 | { |
626 | int err; | 701 | int err; |
627 | u32 perm; | 702 | u32 perm; |
628 | struct v9fs_session_info *v9ses; | ||
629 | struct p9_fid *fid; | 703 | struct p9_fid *fid; |
704 | struct v9fs_session_info *v9ses; | ||
630 | 705 | ||
631 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); | 706 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); |
632 | err = 0; | 707 | err = 0; |
@@ -636,6 +711,9 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
636 | if (IS_ERR(fid)) { | 711 | if (IS_ERR(fid)) { |
637 | err = PTR_ERR(fid); | 712 | err = PTR_ERR(fid); |
638 | fid = NULL; | 713 | fid = NULL; |
714 | } else { | ||
715 | inc_nlink(dir); | ||
716 | v9fs_invalidate_inode_attr(dir); | ||
639 | } | 717 | } |
640 | 718 | ||
641 | if (fid) | 719 | if (fid) |
@@ -687,7 +765,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
687 | return ERR_PTR(result); | 765 | return ERR_PTR(result); |
688 | } | 766 | } |
689 | 767 | ||
690 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 768 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); |
691 | if (IS_ERR(inode)) { | 769 | if (IS_ERR(inode)) { |
692 | result = PTR_ERR(inode); | 770 | result = PTR_ERR(inode); |
693 | inode = NULL; | 771 | inode = NULL; |
@@ -747,17 +825,19 @@ int | |||
747 | v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 825 | v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
748 | struct inode *new_dir, struct dentry *new_dentry) | 826 | struct inode *new_dir, struct dentry *new_dentry) |
749 | { | 827 | { |
828 | int retval; | ||
750 | struct inode *old_inode; | 829 | struct inode *old_inode; |
830 | struct inode *new_inode; | ||
751 | struct v9fs_session_info *v9ses; | 831 | struct v9fs_session_info *v9ses; |
752 | struct p9_fid *oldfid; | 832 | struct p9_fid *oldfid; |
753 | struct p9_fid *olddirfid; | 833 | struct p9_fid *olddirfid; |
754 | struct p9_fid *newdirfid; | 834 | struct p9_fid *newdirfid; |
755 | struct p9_wstat wstat; | 835 | struct p9_wstat wstat; |
756 | int retval; | ||
757 | 836 | ||
758 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 837 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); |
759 | retval = 0; | 838 | retval = 0; |
760 | old_inode = old_dentry->d_inode; | 839 | old_inode = old_dentry->d_inode; |
840 | new_inode = new_dentry->d_inode; | ||
761 | v9ses = v9fs_inode2v9ses(old_inode); | 841 | v9ses = v9fs_inode2v9ses(old_inode); |
762 | oldfid = v9fs_fid_lookup(old_dentry); | 842 | oldfid = v9fs_fid_lookup(old_dentry); |
763 | if (IS_ERR(oldfid)) | 843 | if (IS_ERR(oldfid)) |
@@ -798,9 +878,30 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
798 | retval = p9_client_wstat(oldfid, &wstat); | 878 | retval = p9_client_wstat(oldfid, &wstat); |
799 | 879 | ||
800 | clunk_newdir: | 880 | clunk_newdir: |
801 | if (!retval) | 881 | if (!retval) { |
882 | if (new_inode) { | ||
883 | if (S_ISDIR(new_inode->i_mode)) | ||
884 | clear_nlink(new_inode); | ||
885 | else | ||
886 | drop_nlink(new_inode); | ||
887 | /* | ||
888 | * Work around vfs rename rehash bug with | ||
889 | * FS_RENAME_DOES_D_MOVE | ||
890 | */ | ||
891 | v9fs_invalidate_inode_attr(new_inode); | ||
892 | } | ||
893 | if (S_ISDIR(old_inode->i_mode)) { | ||
894 | if (!new_inode) | ||
895 | inc_nlink(new_dir); | ||
896 | drop_nlink(old_dir); | ||
897 | } | ||
898 | v9fs_invalidate_inode_attr(old_inode); | ||
899 | v9fs_invalidate_inode_attr(old_dir); | ||
900 | v9fs_invalidate_inode_attr(new_dir); | ||
901 | |||
802 | /* successful rename */ | 902 | /* successful rename */ |
803 | d_move(old_dentry, new_dentry); | 903 | d_move(old_dentry, new_dentry); |
904 | } | ||
804 | up_write(&v9ses->rename_sem); | 905 | up_write(&v9ses->rename_sem); |
805 | p9_client_clunk(newdirfid); | 906 | p9_client_clunk(newdirfid); |
806 | 907 | ||
@@ -831,9 +932,10 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
831 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 932 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
832 | err = -EPERM; | 933 | err = -EPERM; |
833 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | 934 | v9ses = v9fs_inode2v9ses(dentry->d_inode); |
834 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) | 935 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
835 | return simple_getattr(mnt, dentry, stat); | 936 | generic_fillattr(dentry->d_inode, stat); |
836 | 937 | return 0; | |
938 | } | ||
837 | fid = v9fs_fid_lookup(dentry); | 939 | fid = v9fs_fid_lookup(dentry); |
838 | if (IS_ERR(fid)) | 940 | if (IS_ERR(fid)) |
839 | return PTR_ERR(fid); | 941 | return PTR_ERR(fid); |
@@ -891,17 +993,20 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
891 | if (iattr->ia_valid & ATTR_GID) | 993 | if (iattr->ia_valid & ATTR_GID) |
892 | wstat.n_gid = iattr->ia_gid; | 994 | wstat.n_gid = iattr->ia_gid; |
893 | } | 995 | } |
894 | |||
895 | retval = p9_client_wstat(fid, &wstat); | ||
896 | if (retval < 0) | ||
897 | return retval; | ||
898 | |||
899 | if ((iattr->ia_valid & ATTR_SIZE) && | 996 | if ((iattr->ia_valid & ATTR_SIZE) && |
900 | iattr->ia_size != i_size_read(dentry->d_inode)) { | 997 | iattr->ia_size != i_size_read(dentry->d_inode)) { |
901 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); | 998 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); |
902 | if (retval) | 999 | if (retval) |
903 | return retval; | 1000 | return retval; |
904 | } | 1001 | } |
1002 | /* Write all dirty data */ | ||
1003 | if (S_ISREG(dentry->d_inode->i_mode)) | ||
1004 | filemap_write_and_wait(dentry->d_inode->i_mapping); | ||
1005 | |||
1006 | retval = p9_client_wstat(fid, &wstat); | ||
1007 | if (retval < 0) | ||
1008 | return retval; | ||
1009 | v9fs_invalidate_inode_attr(dentry->d_inode); | ||
905 | 1010 | ||
906 | setattr_copy(dentry->d_inode, iattr); | 1011 | setattr_copy(dentry->d_inode, iattr); |
907 | mark_inode_dirty(dentry->d_inode); | 1012 | mark_inode_dirty(dentry->d_inode); |
@@ -924,6 +1029,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |||
924 | char tag_name[14]; | 1029 | char tag_name[14]; |
925 | unsigned int i_nlink; | 1030 | unsigned int i_nlink; |
926 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 1031 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
1032 | struct v9fs_inode *v9inode = V9FS_I(inode); | ||
927 | 1033 | ||
928 | inode->i_nlink = 1; | 1034 | inode->i_nlink = 1; |
929 | 1035 | ||
@@ -983,6 +1089,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, | |||
983 | 1089 | ||
984 | /* not real number of blocks, but 512 byte ones ... */ | 1090 | /* not real number of blocks, but 512 byte ones ... */ |
985 | inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; | 1091 | inode->i_blocks = (i_size_read(inode) + 512 - 1) >> 9; |
1092 | v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; | ||
986 | } | 1093 | } |
987 | 1094 | ||
988 | /** | 1095 | /** |
@@ -1115,8 +1222,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, | |||
1115 | int mode, const char *extension) | 1222 | int mode, const char *extension) |
1116 | { | 1223 | { |
1117 | u32 perm; | 1224 | u32 perm; |
1118 | struct v9fs_session_info *v9ses; | ||
1119 | struct p9_fid *fid; | 1225 | struct p9_fid *fid; |
1226 | struct v9fs_session_info *v9ses; | ||
1120 | 1227 | ||
1121 | v9ses = v9fs_inode2v9ses(dir); | 1228 | v9ses = v9fs_inode2v9ses(dir); |
1122 | if (!v9fs_proto_dotu(v9ses)) { | 1229 | if (!v9fs_proto_dotu(v9ses)) { |
@@ -1130,6 +1237,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry, | |||
1130 | if (IS_ERR(fid)) | 1237 | if (IS_ERR(fid)) |
1131 | return PTR_ERR(fid); | 1238 | return PTR_ERR(fid); |
1132 | 1239 | ||
1240 | v9fs_invalidate_inode_attr(dir); | ||
1133 | p9_client_clunk(fid); | 1241 | p9_client_clunk(fid); |
1134 | return 0; | 1242 | return 0; |
1135 | } | 1243 | } |
@@ -1166,8 +1274,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1166 | struct dentry *dentry) | 1274 | struct dentry *dentry) |
1167 | { | 1275 | { |
1168 | int retval; | 1276 | int retval; |
1169 | struct p9_fid *oldfid; | ||
1170 | char *name; | 1277 | char *name; |
1278 | struct p9_fid *oldfid; | ||
1171 | 1279 | ||
1172 | P9_DPRINTK(P9_DEBUG_VFS, | 1280 | P9_DPRINTK(P9_DEBUG_VFS, |
1173 | " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, | 1281 | " %lu,%s,%s\n", dir->i_ino, dentry->d_name.name, |
@@ -1186,7 +1294,10 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1186 | sprintf(name, "%d\n", oldfid->fid); | 1294 | sprintf(name, "%d\n", oldfid->fid); |
1187 | retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); | 1295 | retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); |
1188 | __putname(name); | 1296 | __putname(name); |
1189 | 1297 | if (!retval) { | |
1298 | v9fs_refresh_inode(oldfid, old_dentry->d_inode); | ||
1299 | v9fs_invalidate_inode_attr(dir); | ||
1300 | } | ||
1190 | clunk_fid: | 1301 | clunk_fid: |
1191 | p9_client_clunk(oldfid); | 1302 | p9_client_clunk(oldfid); |
1192 | return retval; | 1303 | return retval; |
@@ -1237,6 +1348,32 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) | |||
1237 | return retval; | 1348 | return retval; |
1238 | } | 1349 | } |
1239 | 1350 | ||
1351 | int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode) | ||
1352 | { | ||
1353 | loff_t i_size; | ||
1354 | struct p9_wstat *st; | ||
1355 | struct v9fs_session_info *v9ses; | ||
1356 | |||
1357 | v9ses = v9fs_inode2v9ses(inode); | ||
1358 | st = p9_client_stat(fid); | ||
1359 | if (IS_ERR(st)) | ||
1360 | return PTR_ERR(st); | ||
1361 | |||
1362 | spin_lock(&inode->i_lock); | ||
1363 | /* | ||
1364 | * We don't want to refresh inode->i_size, | ||
1365 | * because we may have cached data | ||
1366 | */ | ||
1367 | i_size = inode->i_size; | ||
1368 | v9fs_stat2inode(st, inode, inode->i_sb); | ||
1369 | if (v9ses->cache) | ||
1370 | inode->i_size = i_size; | ||
1371 | spin_unlock(&inode->i_lock); | ||
1372 | p9stat_free(st); | ||
1373 | kfree(st); | ||
1374 | return 0; | ||
1375 | } | ||
1376 | |||
1240 | static const struct inode_operations v9fs_dir_inode_operations_dotu = { | 1377 | static const struct inode_operations v9fs_dir_inode_operations_dotu = { |
1241 | .create = v9fs_vfs_create, | 1378 | .create = v9fs_vfs_create, |
1242 | .lookup = v9fs_vfs_lookup, | 1379 | .lookup = v9fs_vfs_lookup, |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index fe3ffa9aace..67c138e94fe 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
@@ -86,40 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) | |||
86 | return dentry; | 86 | return dentry; |
87 | } | 87 | } |
88 | 88 | ||
89 | static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, | ||
90 | struct p9_qid *qid, | ||
91 | struct p9_fid *fid, | ||
92 | struct p9_stat_dotl *st) | ||
93 | { | ||
94 | int retval; | ||
95 | unsigned long i_ino; | ||
96 | struct inode *inode; | ||
97 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
98 | |||
99 | i_ino = v9fs_qid2ino(qid); | ||
100 | inode = iget_locked(sb, i_ino); | ||
101 | if (!inode) | ||
102 | return ERR_PTR(-ENOMEM); | ||
103 | if (!(inode->i_state & I_NEW)) | ||
104 | return inode; | ||
105 | /* | ||
106 | * initialize the inode with the stat info | ||
107 | * FIXME!! we may need support for stale inodes | ||
108 | * later. | ||
109 | */ | ||
110 | retval = v9fs_init_inode(v9ses, inode, st->st_mode); | ||
111 | if (retval) | ||
112 | goto error; | ||
113 | |||
114 | v9fs_stat2inode_dotl(st, inode); | ||
115 | #ifdef CONFIG_9P_FSCACHE | ||
116 | v9fs_fscache_set_key(inode, &st->qid); | ||
117 | v9fs_cache_inode_get_cookie(inode); | ||
118 | #endif | ||
119 | retval = v9fs_get_acl(inode, fid); | ||
120 | if (retval) | ||
121 | goto error; | ||
122 | |||
123 | unlock_new_inode(inode); | ||
124 | return inode; | ||
125 | error: | ||
126 | unlock_new_inode(inode); | ||
127 | iput(inode); | ||
128 | return ERR_PTR(retval); | ||
129 | |||
130 | } | ||
131 | |||
89 | struct inode * | 132 | struct inode * |
90 | v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, | 133 | v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, |
91 | struct super_block *sb) | 134 | struct super_block *sb) |
92 | { | 135 | { |
93 | struct inode *ret = NULL; | ||
94 | int err; | ||
95 | struct p9_stat_dotl *st; | 136 | struct p9_stat_dotl *st; |
137 | struct inode *inode = NULL; | ||
96 | 138 | ||
97 | st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); | 139 | st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); |
98 | if (IS_ERR(st)) | 140 | if (IS_ERR(st)) |
99 | return ERR_CAST(st); | 141 | return ERR_CAST(st); |
100 | 142 | ||
101 | ret = v9fs_get_inode(sb, st->st_mode); | 143 | inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st); |
102 | if (IS_ERR(ret)) { | ||
103 | err = PTR_ERR(ret); | ||
104 | goto error; | ||
105 | } | ||
106 | |||
107 | v9fs_stat2inode_dotl(st, ret); | ||
108 | ret->i_ino = v9fs_qid2ino(&st->qid); | ||
109 | #ifdef CONFIG_9P_FSCACHE | ||
110 | v9fs_vcookie_set_qid(ret, &st->qid); | ||
111 | v9fs_cache_inode_get_cookie(ret); | ||
112 | #endif | ||
113 | err = v9fs_get_acl(ret, fid); | ||
114 | if (err) { | ||
115 | iput(ret); | ||
116 | goto error; | ||
117 | } | ||
118 | kfree(st); | ||
119 | return ret; | ||
120 | error: | ||
121 | kfree(st); | 144 | kfree(st); |
122 | return ERR_PTR(err); | 145 | return inode; |
123 | } | 146 | } |
124 | 147 | ||
125 | /** | 148 | /** |
@@ -136,16 +159,17 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
136 | struct nameidata *nd) | 159 | struct nameidata *nd) |
137 | { | 160 | { |
138 | int err = 0; | 161 | int err = 0; |
139 | char *name = NULL; | ||
140 | gid_t gid; | 162 | gid_t gid; |
141 | int flags; | 163 | int flags; |
142 | mode_t mode; | 164 | mode_t mode; |
143 | struct v9fs_session_info *v9ses; | 165 | char *name = NULL; |
144 | struct p9_fid *fid = NULL; | ||
145 | struct p9_fid *dfid, *ofid; | ||
146 | struct file *filp; | 166 | struct file *filp; |
147 | struct p9_qid qid; | 167 | struct p9_qid qid; |
148 | struct inode *inode; | 168 | struct inode *inode; |
169 | struct p9_fid *fid = NULL; | ||
170 | struct v9fs_inode *v9inode; | ||
171 | struct p9_fid *dfid, *ofid, *inode_fid; | ||
172 | struct v9fs_session_info *v9ses; | ||
149 | struct posix_acl *pacl = NULL, *dacl = NULL; | 173 | struct posix_acl *pacl = NULL, *dacl = NULL; |
150 | 174 | ||
151 | v9ses = v9fs_inode2v9ses(dir); | 175 | v9ses = v9fs_inode2v9ses(dir); |
@@ -196,6 +220,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
196 | err); | 220 | err); |
197 | goto error; | 221 | goto error; |
198 | } | 222 | } |
223 | v9fs_invalidate_inode_attr(dir); | ||
199 | 224 | ||
200 | /* instantiate inode and assign the unopened fid to the dentry */ | 225 | /* instantiate inode and assign the unopened fid to the dentry */ |
201 | fid = p9_client_walk(dfid, 1, &name, 1); | 226 | fid = p9_client_walk(dfid, 1, &name, 1); |
@@ -205,7 +230,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
205 | fid = NULL; | 230 | fid = NULL; |
206 | goto error; | 231 | goto error; |
207 | } | 232 | } |
208 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 233 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); |
209 | if (IS_ERR(inode)) { | 234 | if (IS_ERR(inode)) { |
210 | err = PTR_ERR(inode); | 235 | err = PTR_ERR(inode); |
211 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | 236 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); |
@@ -219,6 +244,22 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
219 | /* Now set the ACL based on the default value */ | 244 | /* Now set the ACL based on the default value */ |
220 | v9fs_set_create_acl(dentry, dacl, pacl); | 245 | v9fs_set_create_acl(dentry, dacl, pacl); |
221 | 246 | ||
247 | v9inode = V9FS_I(inode); | ||
248 | if (v9ses->cache && !v9inode->writeback_fid) { | ||
249 | /* | ||
250 | * clone a fid and add it to writeback_fid | ||
251 | * we do it during open time instead of | ||
252 | * page dirty time via write_begin/page_mkwrite | ||
253 | * because we want write after unlink usecase | ||
254 | * to work. | ||
255 | */ | ||
256 | inode_fid = v9fs_writeback_fid(dentry); | ||
257 | if (IS_ERR(inode_fid)) { | ||
258 | err = PTR_ERR(inode_fid); | ||
259 | goto error; | ||
260 | } | ||
261 | v9inode->writeback_fid = (void *) inode_fid; | ||
262 | } | ||
222 | /* Since we are opening a file, assign the open fid to the file */ | 263 | /* Since we are opening a file, assign the open fid to the file */ |
223 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); | 264 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); |
224 | if (IS_ERR(filp)) { | 265 | if (IS_ERR(filp)) { |
@@ -226,6 +267,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
226 | return PTR_ERR(filp); | 267 | return PTR_ERR(filp); |
227 | } | 268 | } |
228 | filp->private_data = ofid; | 269 | filp->private_data = ofid; |
270 | #ifdef CONFIG_9P_FSCACHE | ||
271 | if (v9ses->cache) | ||
272 | v9fs_cache_inode_set_cookie(inode, filp); | ||
273 | #endif | ||
229 | return 0; | 274 | return 0; |
230 | 275 | ||
231 | error: | 276 | error: |
@@ -300,7 +345,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
300 | goto error; | 345 | goto error; |
301 | } | 346 | } |
302 | 347 | ||
303 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 348 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); |
304 | if (IS_ERR(inode)) { | 349 | if (IS_ERR(inode)) { |
305 | err = PTR_ERR(inode); | 350 | err = PTR_ERR(inode); |
306 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", | 351 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", |
@@ -327,7 +372,8 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, | |||
327 | } | 372 | } |
328 | /* Now set the ACL based on the default value */ | 373 | /* Now set the ACL based on the default value */ |
329 | v9fs_set_create_acl(dentry, dacl, pacl); | 374 | v9fs_set_create_acl(dentry, dacl, pacl); |
330 | 375 | inc_nlink(dir); | |
376 | v9fs_invalidate_inode_attr(dir); | ||
331 | error: | 377 | error: |
332 | if (fid) | 378 | if (fid) |
333 | p9_client_clunk(fid); | 379 | p9_client_clunk(fid); |
@@ -346,9 +392,10 @@ v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, | |||
346 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 392 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
347 | err = -EPERM; | 393 | err = -EPERM; |
348 | v9ses = v9fs_inode2v9ses(dentry->d_inode); | 394 | v9ses = v9fs_inode2v9ses(dentry->d_inode); |
349 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) | 395 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
350 | return simple_getattr(mnt, dentry, stat); | 396 | generic_fillattr(dentry->d_inode, stat); |
351 | 397 | return 0; | |
398 | } | ||
352 | fid = v9fs_fid_lookup(dentry); | 399 | fid = v9fs_fid_lookup(dentry); |
353 | if (IS_ERR(fid)) | 400 | if (IS_ERR(fid)) |
354 | return PTR_ERR(fid); | 401 | return PTR_ERR(fid); |
@@ -406,16 +453,20 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
406 | if (IS_ERR(fid)) | 453 | if (IS_ERR(fid)) |
407 | return PTR_ERR(fid); | 454 | return PTR_ERR(fid); |
408 | 455 | ||
409 | retval = p9_client_setattr(fid, &p9attr); | ||
410 | if (retval < 0) | ||
411 | return retval; | ||
412 | |||
413 | if ((iattr->ia_valid & ATTR_SIZE) && | 456 | if ((iattr->ia_valid & ATTR_SIZE) && |
414 | iattr->ia_size != i_size_read(dentry->d_inode)) { | 457 | iattr->ia_size != i_size_read(dentry->d_inode)) { |
415 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); | 458 | retval = vmtruncate(dentry->d_inode, iattr->ia_size); |
416 | if (retval) | 459 | if (retval) |
417 | return retval; | 460 | return retval; |
418 | } | 461 | } |
462 | /* Write all dirty data */ | ||
463 | if (S_ISREG(dentry->d_inode->i_mode)) | ||
464 | filemap_write_and_wait(dentry->d_inode->i_mapping); | ||
465 | |||
466 | retval = p9_client_setattr(fid, &p9attr); | ||
467 | if (retval < 0) | ||
468 | return retval; | ||
469 | v9fs_invalidate_inode_attr(dentry->d_inode); | ||
419 | 470 | ||
420 | setattr_copy(dentry->d_inode, iattr); | 471 | setattr_copy(dentry->d_inode, iattr); |
421 | mark_inode_dirty(dentry->d_inode); | 472 | mark_inode_dirty(dentry->d_inode); |
@@ -439,6 +490,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) | |||
439 | void | 490 | void |
440 | v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | 491 | v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) |
441 | { | 492 | { |
493 | struct v9fs_inode *v9inode = V9FS_I(inode); | ||
442 | 494 | ||
443 | if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { | 495 | if ((stat->st_result_mask & P9_STATS_BASIC) == P9_STATS_BASIC) { |
444 | inode->i_atime.tv_sec = stat->st_atime_sec; | 496 | inode->i_atime.tv_sec = stat->st_atime_sec; |
@@ -497,20 +549,21 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode) | |||
497 | /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION | 549 | /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION |
498 | * because the inode structure does not have fields for them. | 550 | * because the inode structure does not have fields for them. |
499 | */ | 551 | */ |
552 | v9inode->cache_validity &= ~V9FS_INO_INVALID_ATTR; | ||
500 | } | 553 | } |
501 | 554 | ||
502 | static int | 555 | static int |
503 | v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | 556 | v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, |
504 | const char *symname) | 557 | const char *symname) |
505 | { | 558 | { |
506 | struct v9fs_session_info *v9ses; | ||
507 | struct p9_fid *dfid; | ||
508 | struct p9_fid *fid = NULL; | ||
509 | struct inode *inode; | ||
510 | struct p9_qid qid; | ||
511 | char *name; | ||
512 | int err; | 559 | int err; |
513 | gid_t gid; | 560 | gid_t gid; |
561 | char *name; | ||
562 | struct p9_qid qid; | ||
563 | struct inode *inode; | ||
564 | struct p9_fid *dfid; | ||
565 | struct p9_fid *fid = NULL; | ||
566 | struct v9fs_session_info *v9ses; | ||
514 | 567 | ||
515 | name = (char *) dentry->d_name.name; | 568 | name = (char *) dentry->d_name.name; |
516 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", | 569 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_symlink_dotl : %lu,%s,%s\n", |
@@ -534,6 +587,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
534 | goto error; | 587 | goto error; |
535 | } | 588 | } |
536 | 589 | ||
590 | v9fs_invalidate_inode_attr(dir); | ||
537 | if (v9ses->cache) { | 591 | if (v9ses->cache) { |
538 | /* Now walk from the parent so we can get an unopened fid. */ | 592 | /* Now walk from the parent so we can get an unopened fid. */ |
539 | fid = p9_client_walk(dfid, 1, &name, 1); | 593 | fid = p9_client_walk(dfid, 1, &name, 1); |
@@ -546,7 +600,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry, | |||
546 | } | 600 | } |
547 | 601 | ||
548 | /* instantiate inode and assign the unopened fid to dentry */ | 602 | /* instantiate inode and assign the unopened fid to dentry */ |
549 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 603 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); |
550 | if (IS_ERR(inode)) { | 604 | if (IS_ERR(inode)) { |
551 | err = PTR_ERR(inode); | 605 | err = PTR_ERR(inode); |
552 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", | 606 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", |
@@ -588,10 +642,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, | |||
588 | struct dentry *dentry) | 642 | struct dentry *dentry) |
589 | { | 643 | { |
590 | int err; | 644 | int err; |
591 | struct p9_fid *dfid, *oldfid; | ||
592 | char *name; | 645 | char *name; |
593 | struct v9fs_session_info *v9ses; | ||
594 | struct dentry *dir_dentry; | 646 | struct dentry *dir_dentry; |
647 | struct p9_fid *dfid, *oldfid; | ||
648 | struct v9fs_session_info *v9ses; | ||
595 | 649 | ||
596 | P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", | 650 | P9_DPRINTK(P9_DEBUG_VFS, "dir ino: %lu, old_name: %s, new_name: %s\n", |
597 | dir->i_ino, old_dentry->d_name.name, | 651 | dir->i_ino, old_dentry->d_name.name, |
@@ -616,29 +670,17 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, | |||
616 | return err; | 670 | return err; |
617 | } | 671 | } |
618 | 672 | ||
673 | v9fs_invalidate_inode_attr(dir); | ||
619 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { | 674 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
620 | /* Get the latest stat info from server. */ | 675 | /* Get the latest stat info from server. */ |
621 | struct p9_fid *fid; | 676 | struct p9_fid *fid; |
622 | struct p9_stat_dotl *st; | ||
623 | |||
624 | fid = v9fs_fid_lookup(old_dentry); | 677 | fid = v9fs_fid_lookup(old_dentry); |
625 | if (IS_ERR(fid)) | 678 | if (IS_ERR(fid)) |
626 | return PTR_ERR(fid); | 679 | return PTR_ERR(fid); |
627 | 680 | ||
628 | st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); | 681 | v9fs_refresh_inode_dotl(fid, old_dentry->d_inode); |
629 | if (IS_ERR(st)) | ||
630 | return PTR_ERR(st); | ||
631 | |||
632 | v9fs_stat2inode_dotl(st, old_dentry->d_inode); | ||
633 | |||
634 | kfree(st); | ||
635 | } else { | ||
636 | /* Caching disabled. No need to get upto date stat info. | ||
637 | * This dentry will be released immediately. So, just hold the | ||
638 | * inode | ||
639 | */ | ||
640 | ihold(old_dentry->d_inode); | ||
641 | } | 682 | } |
683 | ihold(old_dentry->d_inode); | ||
642 | d_instantiate(dentry, old_dentry->d_inode); | 684 | d_instantiate(dentry, old_dentry->d_inode); |
643 | 685 | ||
644 | return err; | 686 | return err; |
@@ -657,12 +699,12 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
657 | dev_t rdev) | 699 | dev_t rdev) |
658 | { | 700 | { |
659 | int err; | 701 | int err; |
702 | gid_t gid; | ||
660 | char *name; | 703 | char *name; |
661 | mode_t mode; | 704 | mode_t mode; |
662 | struct v9fs_session_info *v9ses; | 705 | struct v9fs_session_info *v9ses; |
663 | struct p9_fid *fid = NULL, *dfid = NULL; | 706 | struct p9_fid *fid = NULL, *dfid = NULL; |
664 | struct inode *inode; | 707 | struct inode *inode; |
665 | gid_t gid; | ||
666 | struct p9_qid qid; | 708 | struct p9_qid qid; |
667 | struct dentry *dir_dentry; | 709 | struct dentry *dir_dentry; |
668 | struct posix_acl *dacl = NULL, *pacl = NULL; | 710 | struct posix_acl *dacl = NULL, *pacl = NULL; |
@@ -699,6 +741,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
699 | if (err < 0) | 741 | if (err < 0) |
700 | goto error; | 742 | goto error; |
701 | 743 | ||
744 | v9fs_invalidate_inode_attr(dir); | ||
702 | /* instantiate inode and assign the unopened fid to the dentry */ | 745 | /* instantiate inode and assign the unopened fid to the dentry */ |
703 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { | 746 | if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) { |
704 | fid = p9_client_walk(dfid, 1, &name, 1); | 747 | fid = p9_client_walk(dfid, 1, &name, 1); |
@@ -710,7 +753,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
710 | goto error; | 753 | goto error; |
711 | } | 754 | } |
712 | 755 | ||
713 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 756 | inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); |
714 | if (IS_ERR(inode)) { | 757 | if (IS_ERR(inode)) { |
715 | err = PTR_ERR(inode); | 758 | err = PTR_ERR(inode); |
716 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", | 759 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", |
@@ -782,6 +825,31 @@ ndset: | |||
782 | return NULL; | 825 | return NULL; |
783 | } | 826 | } |
784 | 827 | ||
828 | int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) | ||
829 | { | ||
830 | loff_t i_size; | ||
831 | struct p9_stat_dotl *st; | ||
832 | struct v9fs_session_info *v9ses; | ||
833 | |||
834 | v9ses = v9fs_inode2v9ses(inode); | ||
835 | st = p9_client_getattr_dotl(fid, P9_STATS_ALL); | ||
836 | if (IS_ERR(st)) | ||
837 | return PTR_ERR(st); | ||
838 | |||
839 | spin_lock(&inode->i_lock); | ||
840 | /* | ||
841 | * We don't want to refresh inode->i_size, | ||
842 | * because we may have cached data | ||
843 | */ | ||
844 | i_size = inode->i_size; | ||
845 | v9fs_stat2inode_dotl(st, inode); | ||
846 | if (v9ses->cache) | ||
847 | inode->i_size = i_size; | ||
848 | spin_unlock(&inode->i_lock); | ||
849 | kfree(st); | ||
850 | return 0; | ||
851 | } | ||
852 | |||
785 | const struct inode_operations v9fs_dir_inode_operations_dotl = { | 853 | const struct inode_operations v9fs_dir_inode_operations_dotl = { |
786 | .create = v9fs_vfs_create_dotl, | 854 | .create = v9fs_vfs_create_dotl, |
787 | .lookup = v9fs_vfs_lookup, | 855 | .lookup = v9fs_vfs_lookup, |
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index dbaabe3b813..09fd08d1606 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
@@ -86,12 +86,15 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, | |||
86 | } else | 86 | } else |
87 | sb->s_op = &v9fs_super_ops; | 87 | sb->s_op = &v9fs_super_ops; |
88 | sb->s_bdi = &v9ses->bdi; | 88 | sb->s_bdi = &v9ses->bdi; |
89 | if (v9ses->cache) | ||
90 | sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE; | ||
89 | 91 | ||
90 | sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC | | 92 | sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; |
91 | MS_NOATIME; | 93 | if (!v9ses->cache) |
94 | sb->s_flags |= MS_SYNCHRONOUS; | ||
92 | 95 | ||
93 | #ifdef CONFIG_9P_FS_POSIX_ACL | 96 | #ifdef CONFIG_9P_FS_POSIX_ACL |
94 | if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT) | 97 | if ((v9ses->flags & V9FS_ACL_MASK) == V9FS_POSIX_ACL) |
95 | sb->s_flags |= MS_POSIXACL; | 98 | sb->s_flags |= MS_POSIXACL; |
96 | #endif | 99 | #endif |
97 | 100 | ||
@@ -151,7 +154,6 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
151 | retval = PTR_ERR(inode); | 154 | retval = PTR_ERR(inode); |
152 | goto release_sb; | 155 | goto release_sb; |
153 | } | 156 | } |
154 | |||
155 | root = d_alloc_root(inode); | 157 | root = d_alloc_root(inode); |
156 | if (!root) { | 158 | if (!root) { |
157 | iput(inode); | 159 | iput(inode); |
@@ -166,7 +168,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
166 | retval = PTR_ERR(st); | 168 | retval = PTR_ERR(st); |
167 | goto release_sb; | 169 | goto release_sb; |
168 | } | 170 | } |
169 | 171 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); | |
170 | v9fs_stat2inode_dotl(st, root->d_inode); | 172 | v9fs_stat2inode_dotl(st, root->d_inode); |
171 | kfree(st); | 173 | kfree(st); |
172 | } else { | 174 | } else { |
@@ -183,10 +185,21 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, | |||
183 | p9stat_free(st); | 185 | p9stat_free(st); |
184 | kfree(st); | 186 | kfree(st); |
185 | } | 187 | } |
188 | v9fs_fid_add(root, fid); | ||
186 | retval = v9fs_get_acl(inode, fid); | 189 | retval = v9fs_get_acl(inode, fid); |
187 | if (retval) | 190 | if (retval) |
188 | goto release_sb; | 191 | goto release_sb; |
189 | v9fs_fid_add(root, fid); | 192 | /* |
193 | * Add the root fid to session info. This is used | ||
194 | * for file system sync. We want a cloned fid here | ||
195 | * so that we can do a sync_filesystem after a | ||
196 | * shrink_dcache_for_umount | ||
197 | */ | ||
198 | v9ses->root_fid = v9fs_fid_clone(root); | ||
199 | if (IS_ERR(v9ses->root_fid)) { | ||
200 | retval = PTR_ERR(v9ses->root_fid); | ||
201 | goto release_sb; | ||
202 | } | ||
190 | 203 | ||
191 | P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); | 204 | P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n"); |
192 | return dget(sb->s_root); | 205 | return dget(sb->s_root); |
@@ -197,15 +210,11 @@ close_session: | |||
197 | v9fs_session_close(v9ses); | 210 | v9fs_session_close(v9ses); |
198 | kfree(v9ses); | 211 | kfree(v9ses); |
199 | return ERR_PTR(retval); | 212 | return ERR_PTR(retval); |
200 | |||
201 | release_sb: | 213 | release_sb: |
202 | /* | 214 | /* |
203 | * we will do the session_close and root dentry release | 215 | * we will do the session_close and root dentry |
204 | * in the below call. But we need to clunk fid, because we haven't | 216 | * release in the below call. |
205 | * attached the fid to dentry so it won't get clunked | ||
206 | * automatically. | ||
207 | */ | 217 | */ |
208 | p9_client_clunk(fid); | ||
209 | deactivate_locked_super(sb); | 218 | deactivate_locked_super(sb); |
210 | return ERR_PTR(retval); | 219 | return ERR_PTR(retval); |
211 | } | 220 | } |
@@ -223,7 +232,7 @@ static void v9fs_kill_super(struct super_block *s) | |||
223 | P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); | 232 | P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s); |
224 | 233 | ||
225 | kill_anon_super(s); | 234 | kill_anon_super(s); |
226 | 235 | p9_client_clunk(v9ses->root_fid); | |
227 | v9fs_session_cancel(v9ses); | 236 | v9fs_session_cancel(v9ses); |
228 | v9fs_session_close(v9ses); | 237 | v9fs_session_close(v9ses); |
229 | kfree(v9ses); | 238 | kfree(v9ses); |
@@ -276,11 +285,31 @@ done: | |||
276 | return res; | 285 | return res; |
277 | } | 286 | } |
278 | 287 | ||
288 | static int v9fs_sync_fs(struct super_block *sb, int wait) | ||
289 | { | ||
290 | struct v9fs_session_info *v9ses = sb->s_fs_info; | ||
291 | |||
292 | P9_DPRINTK(P9_DEBUG_VFS, "v9fs_sync_fs: super_block %p\n", sb); | ||
293 | return p9_client_sync_fs(v9ses->root_fid); | ||
294 | } | ||
295 | |||
296 | static int v9fs_drop_inode(struct inode *inode) | ||
297 | { | ||
298 | struct v9fs_session_info *v9ses; | ||
299 | v9ses = v9fs_inode2v9ses(inode); | ||
300 | if (v9ses->cache) | ||
301 | return generic_drop_inode(inode); | ||
302 | /* | ||
303 | * in case of non cached mode always drop the | ||
304 | * the inode because we want the inode attribute | ||
305 | * to always match that on the server. | ||
306 | */ | ||
307 | return 1; | ||
308 | } | ||
309 | |||
279 | static const struct super_operations v9fs_super_ops = { | 310 | static const struct super_operations v9fs_super_ops = { |
280 | #ifdef CONFIG_9P_FSCACHE | ||
281 | .alloc_inode = v9fs_alloc_inode, | 311 | .alloc_inode = v9fs_alloc_inode, |
282 | .destroy_inode = v9fs_destroy_inode, | 312 | .destroy_inode = v9fs_destroy_inode, |
283 | #endif | ||
284 | .statfs = simple_statfs, | 313 | .statfs = simple_statfs, |
285 | .evict_inode = v9fs_evict_inode, | 314 | .evict_inode = v9fs_evict_inode, |
286 | .show_options = generic_show_options, | 315 | .show_options = generic_show_options, |
@@ -288,11 +317,11 @@ static const struct super_operations v9fs_super_ops = { | |||
288 | }; | 317 | }; |
289 | 318 | ||
290 | static const struct super_operations v9fs_super_ops_dotl = { | 319 | static const struct super_operations v9fs_super_ops_dotl = { |
291 | #ifdef CONFIG_9P_FSCACHE | ||
292 | .alloc_inode = v9fs_alloc_inode, | 320 | .alloc_inode = v9fs_alloc_inode, |
293 | .destroy_inode = v9fs_destroy_inode, | 321 | .destroy_inode = v9fs_destroy_inode, |
294 | #endif | 322 | .sync_fs = v9fs_sync_fs, |
295 | .statfs = v9fs_statfs, | 323 | .statfs = v9fs_statfs, |
324 | .drop_inode = v9fs_drop_inode, | ||
296 | .evict_inode = v9fs_evict_inode, | 325 | .evict_inode = v9fs_evict_inode, |
297 | .show_options = generic_show_options, | 326 | .show_options = generic_show_options, |
298 | .umount_begin = v9fs_umount_begin, | 327 | .umount_begin = v9fs_umount_begin, |
@@ -303,5 +332,5 @@ struct file_system_type v9fs_fs_type = { | |||
303 | .mount = v9fs_mount, | 332 | .mount = v9fs_mount, |
304 | .kill_sb = v9fs_kill_super, | 333 | .kill_sb = v9fs_kill_super, |
305 | .owner = THIS_MODULE, | 334 | .owner = THIS_MODULE, |
306 | .fs_flags = FS_RENAME_DOES_D_MOVE, | 335 | .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT, |
307 | }; | 336 | }; |
diff --git a/fs/Kconfig b/fs/Kconfig index 3db9caa57ed..7cb53aafac1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -47,7 +47,7 @@ config FS_POSIX_ACL | |||
47 | def_bool n | 47 | def_bool n |
48 | 48 | ||
49 | config EXPORTFS | 49 | config EXPORTFS |
50 | tristate | 50 | bool |
51 | 51 | ||
52 | config FILE_LOCKING | 52 | config FILE_LOCKING |
53 | bool "Enable POSIX file locking API" if EXPERT | 53 | bool "Enable POSIX file locking API" if EXPERT |
diff --git a/fs/Makefile b/fs/Makefile index a7f7cef0c0c..ba01202844c 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -48,6 +48,8 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o | |||
48 | obj-$(CONFIG_NFS_COMMON) += nfs_common/ | 48 | obj-$(CONFIG_NFS_COMMON) += nfs_common/ |
49 | obj-$(CONFIG_GENERIC_ACL) += generic_acl.o | 49 | obj-$(CONFIG_GENERIC_ACL) += generic_acl.o |
50 | 50 | ||
51 | obj-$(CONFIG_FHANDLE) += fhandle.o | ||
52 | |||
51 | obj-y += quota/ | 53 | obj-y += quota/ |
52 | 54 | ||
53 | obj-$(CONFIG_PROC_FS) += proc/ | 55 | obj-$(CONFIG_PROC_FS) += proc/ |
@@ -85,7 +85,7 @@ static int __init aio_setup(void) | |||
85 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 85 | kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
86 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); | 86 | kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); |
87 | 87 | ||
88 | aio_wq = create_workqueue("aio"); | 88 | aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */ |
89 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); | 89 | abe_pool = mempool_create_kmalloc_pool(1, sizeof(struct aio_batch_entry)); |
90 | BUG_ON(!aio_wq || !abe_pool); | 90 | BUG_ON(!aio_wq || !abe_pool); |
91 | 91 | ||
@@ -577,7 +577,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) | |||
577 | spin_lock(&fput_lock); | 577 | spin_lock(&fput_lock); |
578 | list_add(&req->ki_list, &fput_head); | 578 | list_add(&req->ki_list, &fput_head); |
579 | spin_unlock(&fput_lock); | 579 | spin_unlock(&fput_lock); |
580 | queue_work(aio_wq, &fput_work); | 580 | schedule_work(&fput_work); |
581 | } else { | 581 | } else { |
582 | req->ki_filp = NULL; | 582 | req->ki_filp = NULL; |
583 | really_put_req(ctx, req); | 583 | really_put_req(ctx, req); |
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index ff27d7a477b..b4ffad859ad 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c | |||
@@ -21,9 +21,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
21 | int len = *max_len; | 21 | int len = *max_len; |
22 | int type; | 22 | int type; |
23 | 23 | ||
24 | if ((len < BTRFS_FID_SIZE_NON_CONNECTABLE) || | 24 | if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { |
25 | (connectable && len < BTRFS_FID_SIZE_CONNECTABLE)) | 25 | *max_len = BTRFS_FID_SIZE_CONNECTABLE; |
26 | return 255; | 26 | return 255; |
27 | } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { | ||
28 | *max_len = BTRFS_FID_SIZE_NON_CONNECTABLE; | ||
29 | return 255; | ||
30 | } | ||
27 | 31 | ||
28 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; | 32 | len = BTRFS_FID_SIZE_NON_CONNECTABLE; |
29 | type = FILEID_BTRFS_WITHOUT_PARENT; | 33 | type = FILEID_BTRFS_WITHOUT_PARENT; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 93323ac26b0..512c3d1da08 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -4807,9 +4807,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4807 | int err; | 4807 | int err; |
4808 | int drop_inode = 0; | 4808 | int drop_inode = 0; |
4809 | 4809 | ||
4810 | if (inode->i_nlink == 0) | ||
4811 | return -ENOENT; | ||
4812 | |||
4813 | /* do not allow sys_link's with other subvols of the same device */ | 4810 | /* do not allow sys_link's with other subvols of the same device */ |
4814 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4811 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4815 | return -EPERM; | 4812 | return -EPERM; |
diff --git a/fs/compat.c b/fs/compat.c index 691c3fd8ce1..c6d31a3bab8 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -262,35 +262,19 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * | |||
262 | */ | 262 | */ |
263 | asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) | 263 | asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) |
264 | { | 264 | { |
265 | struct path path; | 265 | struct kstatfs tmp; |
266 | int error; | 266 | int error = user_statfs(pathname, &tmp); |
267 | 267 | if (!error) | |
268 | error = user_path(pathname, &path); | 268 | error = put_compat_statfs(buf, &tmp); |
269 | if (!error) { | ||
270 | struct kstatfs tmp; | ||
271 | error = vfs_statfs(&path, &tmp); | ||
272 | if (!error) | ||
273 | error = put_compat_statfs(buf, &tmp); | ||
274 | path_put(&path); | ||
275 | } | ||
276 | return error; | 269 | return error; |
277 | } | 270 | } |
278 | 271 | ||
279 | asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) | 272 | asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) |
280 | { | 273 | { |
281 | struct file * file; | ||
282 | struct kstatfs tmp; | 274 | struct kstatfs tmp; |
283 | int error; | 275 | int error = fd_statfs(fd, &tmp); |
284 | |||
285 | error = -EBADF; | ||
286 | file = fget(fd); | ||
287 | if (!file) | ||
288 | goto out; | ||
289 | error = vfs_statfs(&file->f_path, &tmp); | ||
290 | if (!error) | 276 | if (!error) |
291 | error = put_compat_statfs(buf, &tmp); | 277 | error = put_compat_statfs(buf, &tmp); |
292 | fput(file); | ||
293 | out: | ||
294 | return error; | 278 | return error; |
295 | } | 279 | } |
296 | 280 | ||
@@ -329,41 +313,29 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat | |||
329 | 313 | ||
330 | asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) | 314 | asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) |
331 | { | 315 | { |
332 | struct path path; | 316 | struct kstatfs tmp; |
333 | int error; | 317 | int error; |
334 | 318 | ||
335 | if (sz != sizeof(*buf)) | 319 | if (sz != sizeof(*buf)) |
336 | return -EINVAL; | 320 | return -EINVAL; |
337 | 321 | ||
338 | error = user_path(pathname, &path); | 322 | error = user_statfs(pathname, &tmp); |
339 | if (!error) { | 323 | if (!error) |
340 | struct kstatfs tmp; | 324 | error = put_compat_statfs64(buf, &tmp); |
341 | error = vfs_statfs(&path, &tmp); | ||
342 | if (!error) | ||
343 | error = put_compat_statfs64(buf, &tmp); | ||
344 | path_put(&path); | ||
345 | } | ||
346 | return error; | 325 | return error; |
347 | } | 326 | } |
348 | 327 | ||
349 | asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) | 328 | asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) |
350 | { | 329 | { |
351 | struct file * file; | ||
352 | struct kstatfs tmp; | 330 | struct kstatfs tmp; |
353 | int error; | 331 | int error; |
354 | 332 | ||
355 | if (sz != sizeof(*buf)) | 333 | if (sz != sizeof(*buf)) |
356 | return -EINVAL; | 334 | return -EINVAL; |
357 | 335 | ||
358 | error = -EBADF; | 336 | error = fd_statfs(fd, &tmp); |
359 | file = fget(fd); | ||
360 | if (!file) | ||
361 | goto out; | ||
362 | error = vfs_statfs(&file->f_path, &tmp); | ||
363 | if (!error) | 337 | if (!error) |
364 | error = put_compat_statfs64(buf, &tmp); | 338 | error = put_compat_statfs64(buf, &tmp); |
365 | fput(file); | ||
366 | out: | ||
367 | return error; | 339 | return error; |
368 | } | 340 | } |
369 | 341 | ||
@@ -2312,3 +2284,16 @@ asmlinkage long compat_sys_timerfd_gettime(int ufd, | |||
2312 | } | 2284 | } |
2313 | 2285 | ||
2314 | #endif /* CONFIG_TIMERFD */ | 2286 | #endif /* CONFIG_TIMERFD */ |
2287 | |||
2288 | #ifdef CONFIG_FHANDLE | ||
2289 | /* | ||
2290 | * Exactly like fs/open.c:sys_open_by_handle_at(), except that it | ||
2291 | * doesn't set the O_LARGEFILE flag. | ||
2292 | */ | ||
2293 | asmlinkage long | ||
2294 | compat_sys_open_by_handle_at(int mountdirfd, | ||
2295 | struct file_handle __user *handle, int flags) | ||
2296 | { | ||
2297 | return do_handle_open(mountdirfd, handle, flags); | ||
2298 | } | ||
2299 | #endif | ||
diff --git a/fs/dcache.c b/fs/dcache.c index 611ffe928c0..a39fe47c466 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -296,8 +296,12 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) | |||
296 | __releases(parent->d_lock) | 296 | __releases(parent->d_lock) |
297 | __releases(dentry->d_inode->i_lock) | 297 | __releases(dentry->d_inode->i_lock) |
298 | { | 298 | { |
299 | dentry->d_parent = NULL; | ||
300 | list_del(&dentry->d_u.d_child); | 299 | list_del(&dentry->d_u.d_child); |
300 | /* | ||
301 | * Inform try_to_ascend() that we are no longer attached to the | ||
302 | * dentry tree | ||
303 | */ | ||
304 | dentry->d_flags |= DCACHE_DISCONNECTED; | ||
301 | if (parent) | 305 | if (parent) |
302 | spin_unlock(&parent->d_lock); | 306 | spin_unlock(&parent->d_lock); |
303 | dentry_iput(dentry); | 307 | dentry_iput(dentry); |
@@ -1012,6 +1016,35 @@ void shrink_dcache_for_umount(struct super_block *sb) | |||
1012 | } | 1016 | } |
1013 | 1017 | ||
1014 | /* | 1018 | /* |
1019 | * This tries to ascend one level of parenthood, but | ||
1020 | * we can race with renaming, so we need to re-check | ||
1021 | * the parenthood after dropping the lock and check | ||
1022 | * that the sequence number still matches. | ||
1023 | */ | ||
1024 | static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq) | ||
1025 | { | ||
1026 | struct dentry *new = old->d_parent; | ||
1027 | |||
1028 | rcu_read_lock(); | ||
1029 | spin_unlock(&old->d_lock); | ||
1030 | spin_lock(&new->d_lock); | ||
1031 | |||
1032 | /* | ||
1033 | * might go back up the wrong parent if we have had a rename | ||
1034 | * or deletion | ||
1035 | */ | ||
1036 | if (new != old->d_parent || | ||
1037 | (old->d_flags & DCACHE_DISCONNECTED) || | ||
1038 | (!locked && read_seqretry(&rename_lock, seq))) { | ||
1039 | spin_unlock(&new->d_lock); | ||
1040 | new = NULL; | ||
1041 | } | ||
1042 | rcu_read_unlock(); | ||
1043 | return new; | ||
1044 | } | ||
1045 | |||
1046 | |||
1047 | /* | ||
1015 | * Search for at least 1 mount point in the dentry's subdirs. | 1048 | * Search for at least 1 mount point in the dentry's subdirs. |
1016 | * We descend to the next level whenever the d_subdirs | 1049 | * We descend to the next level whenever the d_subdirs |
1017 | * list is non-empty and continue searching. | 1050 | * list is non-empty and continue searching. |
@@ -1066,24 +1099,10 @@ resume: | |||
1066 | * All done at this level ... ascend and resume the search. | 1099 | * All done at this level ... ascend and resume the search. |
1067 | */ | 1100 | */ |
1068 | if (this_parent != parent) { | 1101 | if (this_parent != parent) { |
1069 | struct dentry *tmp; | 1102 | struct dentry *child = this_parent; |
1070 | struct dentry *child; | 1103 | this_parent = try_to_ascend(this_parent, locked, seq); |
1071 | 1104 | if (!this_parent) | |
1072 | tmp = this_parent->d_parent; | ||
1073 | rcu_read_lock(); | ||
1074 | spin_unlock(&this_parent->d_lock); | ||
1075 | child = this_parent; | ||
1076 | this_parent = tmp; | ||
1077 | spin_lock(&this_parent->d_lock); | ||
1078 | /* might go back up the wrong parent if we have had a rename | ||
1079 | * or deletion */ | ||
1080 | if (this_parent != child->d_parent || | ||
1081 | (!locked && read_seqretry(&rename_lock, seq))) { | ||
1082 | spin_unlock(&this_parent->d_lock); | ||
1083 | rcu_read_unlock(); | ||
1084 | goto rename_retry; | 1105 | goto rename_retry; |
1085 | } | ||
1086 | rcu_read_unlock(); | ||
1087 | next = child->d_u.d_child.next; | 1106 | next = child->d_u.d_child.next; |
1088 | goto resume; | 1107 | goto resume; |
1089 | } | 1108 | } |
@@ -1181,24 +1200,10 @@ resume: | |||
1181 | * All done at this level ... ascend and resume the search. | 1200 | * All done at this level ... ascend and resume the search. |
1182 | */ | 1201 | */ |
1183 | if (this_parent != parent) { | 1202 | if (this_parent != parent) { |
1184 | struct dentry *tmp; | 1203 | struct dentry *child = this_parent; |
1185 | struct dentry *child; | 1204 | this_parent = try_to_ascend(this_parent, locked, seq); |
1186 | 1205 | if (!this_parent) | |
1187 | tmp = this_parent->d_parent; | ||
1188 | rcu_read_lock(); | ||
1189 | spin_unlock(&this_parent->d_lock); | ||
1190 | child = this_parent; | ||
1191 | this_parent = tmp; | ||
1192 | spin_lock(&this_parent->d_lock); | ||
1193 | /* might go back up the wrong parent if we have had a rename | ||
1194 | * or deletion */ | ||
1195 | if (this_parent != child->d_parent || | ||
1196 | (!locked && read_seqretry(&rename_lock, seq))) { | ||
1197 | spin_unlock(&this_parent->d_lock); | ||
1198 | rcu_read_unlock(); | ||
1199 | goto rename_retry; | 1206 | goto rename_retry; |
1200 | } | ||
1201 | rcu_read_unlock(); | ||
1202 | next = child->d_u.d_child.next; | 1207 | next = child->d_u.d_child.next; |
1203 | goto resume; | 1208 | goto resume; |
1204 | } | 1209 | } |
@@ -2942,28 +2947,14 @@ resume: | |||
2942 | spin_unlock(&dentry->d_lock); | 2947 | spin_unlock(&dentry->d_lock); |
2943 | } | 2948 | } |
2944 | if (this_parent != root) { | 2949 | if (this_parent != root) { |
2945 | struct dentry *tmp; | 2950 | struct dentry *child = this_parent; |
2946 | struct dentry *child; | ||
2947 | |||
2948 | tmp = this_parent->d_parent; | ||
2949 | if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { | 2951 | if (!(this_parent->d_flags & DCACHE_GENOCIDE)) { |
2950 | this_parent->d_flags |= DCACHE_GENOCIDE; | 2952 | this_parent->d_flags |= DCACHE_GENOCIDE; |
2951 | this_parent->d_count--; | 2953 | this_parent->d_count--; |
2952 | } | 2954 | } |
2953 | rcu_read_lock(); | 2955 | this_parent = try_to_ascend(this_parent, locked, seq); |
2954 | spin_unlock(&this_parent->d_lock); | 2956 | if (!this_parent) |
2955 | child = this_parent; | ||
2956 | this_parent = tmp; | ||
2957 | spin_lock(&this_parent->d_lock); | ||
2958 | /* might go back up the wrong parent if we have had a rename | ||
2959 | * or deletion */ | ||
2960 | if (this_parent != child->d_parent || | ||
2961 | (!locked && read_seqretry(&rename_lock, seq))) { | ||
2962 | spin_unlock(&this_parent->d_lock); | ||
2963 | rcu_read_unlock(); | ||
2964 | goto rename_retry; | 2957 | goto rename_retry; |
2965 | } | ||
2966 | rcu_read_unlock(); | ||
2967 | next = child->d_u.d_child.next; | 2958 | next = child->d_u.d_child.next; |
2968 | goto resume; | 2959 | goto resume; |
2969 | } | 2960 | } |
@@ -115,13 +115,16 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) | |||
115 | struct file *file; | 115 | struct file *file; |
116 | char *tmp = getname(library); | 116 | char *tmp = getname(library); |
117 | int error = PTR_ERR(tmp); | 117 | int error = PTR_ERR(tmp); |
118 | static const struct open_flags uselib_flags = { | ||
119 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | ||
120 | .acc_mode = MAY_READ | MAY_EXEC | MAY_OPEN, | ||
121 | .intent = LOOKUP_OPEN | ||
122 | }; | ||
118 | 123 | ||
119 | if (IS_ERR(tmp)) | 124 | if (IS_ERR(tmp)) |
120 | goto out; | 125 | goto out; |
121 | 126 | ||
122 | file = do_filp_open(AT_FDCWD, tmp, | 127 | file = do_filp_open(AT_FDCWD, tmp, &uselib_flags, LOOKUP_FOLLOW); |
123 | O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, | ||
124 | MAY_READ | MAY_EXEC | MAY_OPEN); | ||
125 | putname(tmp); | 128 | putname(tmp); |
126 | error = PTR_ERR(file); | 129 | error = PTR_ERR(file); |
127 | if (IS_ERR(file)) | 130 | if (IS_ERR(file)) |
@@ -721,10 +724,13 @@ struct file *open_exec(const char *name) | |||
721 | { | 724 | { |
722 | struct file *file; | 725 | struct file *file; |
723 | int err; | 726 | int err; |
727 | static const struct open_flags open_exec_flags = { | ||
728 | .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, | ||
729 | .acc_mode = MAY_EXEC | MAY_OPEN, | ||
730 | .intent = LOOKUP_OPEN | ||
731 | }; | ||
724 | 732 | ||
725 | file = do_filp_open(AT_FDCWD, name, | 733 | file = do_filp_open(AT_FDCWD, name, &open_exec_flags, LOOKUP_FOLLOW); |
726 | O_LARGEFILE | O_RDONLY | __FMODE_EXEC, 0, | ||
727 | MAY_EXEC | MAY_OPEN); | ||
728 | if (IS_ERR(file)) | 734 | if (IS_ERR(file)) |
729 | goto out; | 735 | goto out; |
730 | 736 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 4b6825740dd..b05acb79613 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -320,9 +320,14 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid, | |||
320 | struct inode * inode = dentry->d_inode; | 320 | struct inode * inode = dentry->d_inode; |
321 | int len = *max_len; | 321 | int len = *max_len; |
322 | int type = FILEID_INO32_GEN; | 322 | int type = FILEID_INO32_GEN; |
323 | 323 | ||
324 | if (len < 2 || (connectable && len < 4)) | 324 | if (connectable && (len < 4)) { |
325 | *max_len = 4; | ||
326 | return 255; | ||
327 | } else if (len < 2) { | ||
328 | *max_len = 2; | ||
325 | return 255; | 329 | return 255; |
330 | } | ||
326 | 331 | ||
327 | len = 2; | 332 | len = 2; |
328 | fid->i32.ino = inode->i_ino; | 333 | fid->i32.ino = inode->i_ino; |
@@ -369,6 +374,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, | |||
369 | /* | 374 | /* |
370 | * Try to get any dentry for the given file handle from the filesystem. | 375 | * Try to get any dentry for the given file handle from the filesystem. |
371 | */ | 376 | */ |
377 | if (!nop || !nop->fh_to_dentry) | ||
378 | return ERR_PTR(-ESTALE); | ||
372 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); | 379 | result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); |
373 | if (!result) | 380 | if (!result) |
374 | result = ERR_PTR(-ESTALE); | 381 | result = ERR_PTR(-ESTALE); |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 9dba3bd69d9..0521a007ae6 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -2253,13 +2253,6 @@ static int ext3_link (struct dentry * old_dentry, | |||
2253 | 2253 | ||
2254 | dquot_initialize(dir); | 2254 | dquot_initialize(dir); |
2255 | 2255 | ||
2256 | /* | ||
2257 | * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing | ||
2258 | * otherwise has the potential to corrupt the orphan inode list. | ||
2259 | */ | ||
2260 | if (inode->i_nlink == 0) | ||
2261 | return -ENOENT; | ||
2262 | |||
2263 | retry: | 2256 | retry: |
2264 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | 2257 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + |
2265 | EXT3_INDEX_EXTRA_TRANS_BLOCKS); | 2258 | EXT3_INDEX_EXTRA_TRANS_BLOCKS); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 85c8cc8f247..9cc19a1dea8 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -1936,6 +1936,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1936 | sb->s_qcop = &ext3_qctl_operations; | 1936 | sb->s_qcop = &ext3_qctl_operations; |
1937 | sb->dq_op = &ext3_quota_operations; | 1937 | sb->dq_op = &ext3_quota_operations; |
1938 | #endif | 1938 | #endif |
1939 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); | ||
1939 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 1940 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
1940 | mutex_init(&sbi->s_orphan_lock); | 1941 | mutex_init(&sbi->s_orphan_lock); |
1941 | mutex_init(&sbi->s_resize_lock); | 1942 | mutex_init(&sbi->s_resize_lock); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5485390d32c..e781b7ea563 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2304,13 +2304,6 @@ static int ext4_link(struct dentry *old_dentry, | |||
2304 | 2304 | ||
2305 | dquot_initialize(dir); | 2305 | dquot_initialize(dir); |
2306 | 2306 | ||
2307 | /* | ||
2308 | * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing | ||
2309 | * otherwise has the potential to corrupt the orphan inode list. | ||
2310 | */ | ||
2311 | if (inode->i_nlink == 0) | ||
2312 | return -ENOENT; | ||
2313 | |||
2314 | retry: | 2307 | retry: |
2315 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + | 2308 | handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + |
2316 | EXT4_INDEX_EXTRA_TRANS_BLOCKS); | 2309 | EXT4_INDEX_EXTRA_TRANS_BLOCKS); |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f6a318f836b..203f9e4a70b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -3415,6 +3415,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3415 | sb->s_qcop = &ext4_qctl_operations; | 3415 | sb->s_qcop = &ext4_qctl_operations; |
3416 | sb->dq_op = &ext4_quota_operations; | 3416 | sb->dq_op = &ext4_quota_operations; |
3417 | #endif | 3417 | #endif |
3418 | memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid)); | ||
3419 | |||
3418 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ | 3420 | INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ |
3419 | mutex_init(&sbi->s_orphan_lock); | 3421 | mutex_init(&sbi->s_orphan_lock); |
3420 | mutex_init(&sbi->s_resize_lock); | 3422 | mutex_init(&sbi->s_resize_lock); |
@@ -3509,7 +3511,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3509 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); | 3511 | percpu_counter_set(&sbi->s_dirtyblocks_counter, 0); |
3510 | 3512 | ||
3511 | no_journal: | 3513 | no_journal: |
3512 | EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten"); | 3514 | /* |
3515 | * The maximum number of concurrent works can be high and | ||
3516 | * concurrency isn't really necessary. Limit it to 1. | ||
3517 | */ | ||
3518 | EXT4_SB(sb)->dio_unwritten_wq = | ||
3519 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); | ||
3513 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3520 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3514 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3521 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
3515 | goto failed_mount_wq; | 3522 | goto failed_mount_wq; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 86753fe10bd..0e277ec4b61 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -757,8 +757,10 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) | |||
757 | struct inode *inode = de->d_inode; | 757 | struct inode *inode = de->d_inode; |
758 | u32 ipos_h, ipos_m, ipos_l; | 758 | u32 ipos_h, ipos_m, ipos_l; |
759 | 759 | ||
760 | if (len < 5) | 760 | if (len < 5) { |
761 | *lenp = 5; | ||
761 | return 255; /* no room */ | 762 | return 255; /* no room */ |
763 | } | ||
762 | 764 | ||
763 | ipos_h = MSDOS_I(inode)->i_pos >> 8; | 765 | ipos_h = MSDOS_I(inode)->i_pos >> 8; |
764 | ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; | 766 | ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24; |
diff --git a/fs/fcntl.c b/fs/fcntl.c index cb1026181bd..6c82e5bac03 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -131,7 +131,7 @@ SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) | |||
131 | SYSCALL_DEFINE1(dup, unsigned int, fildes) | 131 | SYSCALL_DEFINE1(dup, unsigned int, fildes) |
132 | { | 132 | { |
133 | int ret = -EBADF; | 133 | int ret = -EBADF; |
134 | struct file *file = fget(fildes); | 134 | struct file *file = fget_raw(fildes); |
135 | 135 | ||
136 | if (file) { | 136 | if (file) { |
137 | ret = get_unused_fd(); | 137 | ret = get_unused_fd(); |
@@ -426,15 +426,35 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
426 | return err; | 426 | return err; |
427 | } | 427 | } |
428 | 428 | ||
429 | static int check_fcntl_cmd(unsigned cmd) | ||
430 | { | ||
431 | switch (cmd) { | ||
432 | case F_DUPFD: | ||
433 | case F_DUPFD_CLOEXEC: | ||
434 | case F_GETFD: | ||
435 | case F_SETFD: | ||
436 | case F_GETFL: | ||
437 | return 1; | ||
438 | } | ||
439 | return 0; | ||
440 | } | ||
441 | |||
429 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) | 442 | SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) |
430 | { | 443 | { |
431 | struct file *filp; | 444 | struct file *filp; |
432 | long err = -EBADF; | 445 | long err = -EBADF; |
433 | 446 | ||
434 | filp = fget(fd); | 447 | filp = fget_raw(fd); |
435 | if (!filp) | 448 | if (!filp) |
436 | goto out; | 449 | goto out; |
437 | 450 | ||
451 | if (unlikely(filp->f_mode & FMODE_PATH)) { | ||
452 | if (!check_fcntl_cmd(cmd)) { | ||
453 | fput(filp); | ||
454 | goto out; | ||
455 | } | ||
456 | } | ||
457 | |||
438 | err = security_file_fcntl(filp, cmd, arg); | 458 | err = security_file_fcntl(filp, cmd, arg); |
439 | if (err) { | 459 | if (err) { |
440 | fput(filp); | 460 | fput(filp); |
@@ -456,10 +476,17 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | |||
456 | long err; | 476 | long err; |
457 | 477 | ||
458 | err = -EBADF; | 478 | err = -EBADF; |
459 | filp = fget(fd); | 479 | filp = fget_raw(fd); |
460 | if (!filp) | 480 | if (!filp) |
461 | goto out; | 481 | goto out; |
462 | 482 | ||
483 | if (unlikely(filp->f_mode & FMODE_PATH)) { | ||
484 | if (!check_fcntl_cmd(cmd)) { | ||
485 | fput(filp); | ||
486 | goto out; | ||
487 | } | ||
488 | } | ||
489 | |||
463 | err = security_file_fcntl(filp, cmd, arg); | 490 | err = security_file_fcntl(filp, cmd, arg); |
464 | if (err) { | 491 | if (err) { |
465 | fput(filp); | 492 | fput(filp); |
@@ -808,14 +835,14 @@ static int __init fcntl_init(void) | |||
808 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY | 835 | * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY |
809 | * is defined as O_NONBLOCK on some platforms and not on others. | 836 | * is defined as O_NONBLOCK on some platforms and not on others. |
810 | */ | 837 | */ |
811 | BUILD_BUG_ON(18 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( | 838 | BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( |
812 | O_RDONLY | O_WRONLY | O_RDWR | | 839 | O_RDONLY | O_WRONLY | O_RDWR | |
813 | O_CREAT | O_EXCL | O_NOCTTY | | 840 | O_CREAT | O_EXCL | O_NOCTTY | |
814 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ | 841 | O_TRUNC | O_APPEND | /* O_NONBLOCK | */ |
815 | __O_SYNC | O_DSYNC | FASYNC | | 842 | __O_SYNC | O_DSYNC | FASYNC | |
816 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | | 843 | O_DIRECT | O_LARGEFILE | O_DIRECTORY | |
817 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | | 844 | O_NOFOLLOW | O_NOATIME | O_CLOEXEC | |
818 | __FMODE_EXEC | 845 | __FMODE_EXEC | O_PATH |
819 | )); | 846 | )); |
820 | 847 | ||
821 | fasync_cache = kmem_cache_create("fasync_cache", | 848 | fasync_cache = kmem_cache_create("fasync_cache", |
diff --git a/fs/fhandle.c b/fs/fhandle.c new file mode 100644 index 00000000000..bf93ad2bee0 --- /dev/null +++ b/fs/fhandle.c | |||
@@ -0,0 +1,265 @@ | |||
1 | #include <linux/syscalls.h> | ||
2 | #include <linux/slab.h> | ||
3 | #include <linux/fs.h> | ||
4 | #include <linux/file.h> | ||
5 | #include <linux/mount.h> | ||
6 | #include <linux/namei.h> | ||
7 | #include <linux/exportfs.h> | ||
8 | #include <linux/fs_struct.h> | ||
9 | #include <linux/fsnotify.h> | ||
10 | #include <asm/uaccess.h> | ||
11 | #include "internal.h" | ||
12 | |||
13 | static long do_sys_name_to_handle(struct path *path, | ||
14 | struct file_handle __user *ufh, | ||
15 | int __user *mnt_id) | ||
16 | { | ||
17 | long retval; | ||
18 | struct file_handle f_handle; | ||
19 | int handle_dwords, handle_bytes; | ||
20 | struct file_handle *handle = NULL; | ||
21 | |||
22 | /* | ||
23 | * We need t make sure wether the file system | ||
24 | * support decoding of the file handle | ||
25 | */ | ||
26 | if (!path->mnt->mnt_sb->s_export_op || | ||
27 | !path->mnt->mnt_sb->s_export_op->fh_to_dentry) | ||
28 | return -EOPNOTSUPP; | ||
29 | |||
30 | if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) | ||
31 | return -EFAULT; | ||
32 | |||
33 | if (f_handle.handle_bytes > MAX_HANDLE_SZ) | ||
34 | return -EINVAL; | ||
35 | |||
36 | handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, | ||
37 | GFP_KERNEL); | ||
38 | if (!handle) | ||
39 | return -ENOMEM; | ||
40 | |||
41 | /* convert handle size to multiple of sizeof(u32) */ | ||
42 | handle_dwords = f_handle.handle_bytes >> 2; | ||
43 | |||
44 | /* we ask for a non connected handle */ | ||
45 | retval = exportfs_encode_fh(path->dentry, | ||
46 | (struct fid *)handle->f_handle, | ||
47 | &handle_dwords, 0); | ||
48 | handle->handle_type = retval; | ||
49 | /* convert handle size to bytes */ | ||
50 | handle_bytes = handle_dwords * sizeof(u32); | ||
51 | handle->handle_bytes = handle_bytes; | ||
52 | if ((handle->handle_bytes > f_handle.handle_bytes) || | ||
53 | (retval == 255) || (retval == -ENOSPC)) { | ||
54 | /* As per old exportfs_encode_fh documentation | ||
55 | * we could return ENOSPC to indicate overflow | ||
56 | * But file system returned 255 always. So handle | ||
57 | * both the values | ||
58 | */ | ||
59 | /* | ||
60 | * set the handle size to zero so we copy only | ||
61 | * non variable part of the file_handle | ||
62 | */ | ||
63 | handle_bytes = 0; | ||
64 | retval = -EOVERFLOW; | ||
65 | } else | ||
66 | retval = 0; | ||
67 | /* copy the mount id */ | ||
68 | if (copy_to_user(mnt_id, &path->mnt->mnt_id, sizeof(*mnt_id)) || | ||
69 | copy_to_user(ufh, handle, | ||
70 | sizeof(struct file_handle) + handle_bytes)) | ||
71 | retval = -EFAULT; | ||
72 | kfree(handle); | ||
73 | return retval; | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * sys_name_to_handle_at: convert name to handle | ||
78 | * @dfd: directory relative to which name is interpreted if not absolute | ||
79 | * @name: name that should be converted to handle. | ||
80 | * @handle: resulting file handle | ||
81 | * @mnt_id: mount id of the file system containing the file | ||
82 | * @flag: flag value to indicate whether to follow symlink or not | ||
83 | * | ||
84 | * @handle->handle_size indicate the space available to store the | ||
85 | * variable part of the file handle in bytes. If there is not | ||
86 | * enough space, the field is updated to return the minimum | ||
87 | * value required. | ||
88 | */ | ||
89 | SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, | ||
90 | struct file_handle __user *, handle, int __user *, mnt_id, | ||
91 | int, flag) | ||
92 | { | ||
93 | struct path path; | ||
94 | int lookup_flags; | ||
95 | int err; | ||
96 | |||
97 | if ((flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) | ||
98 | return -EINVAL; | ||
99 | |||
100 | lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; | ||
101 | if (flag & AT_EMPTY_PATH) | ||
102 | lookup_flags |= LOOKUP_EMPTY; | ||
103 | err = user_path_at(dfd, name, lookup_flags, &path); | ||
104 | if (!err) { | ||
105 | err = do_sys_name_to_handle(&path, handle, mnt_id); | ||
106 | path_put(&path); | ||
107 | } | ||
108 | return err; | ||
109 | } | ||
110 | |||
111 | static struct vfsmount *get_vfsmount_from_fd(int fd) | ||
112 | { | ||
113 | struct path path; | ||
114 | |||
115 | if (fd == AT_FDCWD) { | ||
116 | struct fs_struct *fs = current->fs; | ||
117 | spin_lock(&fs->lock); | ||
118 | path = fs->pwd; | ||
119 | mntget(path.mnt); | ||
120 | spin_unlock(&fs->lock); | ||
121 | } else { | ||
122 | int fput_needed; | ||
123 | struct file *file = fget_light(fd, &fput_needed); | ||
124 | if (!file) | ||
125 | return ERR_PTR(-EBADF); | ||
126 | path = file->f_path; | ||
127 | mntget(path.mnt); | ||
128 | fput_light(file, fput_needed); | ||
129 | } | ||
130 | return path.mnt; | ||
131 | } | ||
132 | |||
133 | static int vfs_dentry_acceptable(void *context, struct dentry *dentry) | ||
134 | { | ||
135 | return 1; | ||
136 | } | ||
137 | |||
138 | static int do_handle_to_path(int mountdirfd, struct file_handle *handle, | ||
139 | struct path *path) | ||
140 | { | ||
141 | int retval = 0; | ||
142 | int handle_dwords; | ||
143 | |||
144 | path->mnt = get_vfsmount_from_fd(mountdirfd); | ||
145 | if (IS_ERR(path->mnt)) { | ||
146 | retval = PTR_ERR(path->mnt); | ||
147 | goto out_err; | ||
148 | } | ||
149 | /* change the handle size to multiple of sizeof(u32) */ | ||
150 | handle_dwords = handle->handle_bytes >> 2; | ||
151 | path->dentry = exportfs_decode_fh(path->mnt, | ||
152 | (struct fid *)handle->f_handle, | ||
153 | handle_dwords, handle->handle_type, | ||
154 | vfs_dentry_acceptable, NULL); | ||
155 | if (IS_ERR(path->dentry)) { | ||
156 | retval = PTR_ERR(path->dentry); | ||
157 | goto out_mnt; | ||
158 | } | ||
159 | return 0; | ||
160 | out_mnt: | ||
161 | mntput(path->mnt); | ||
162 | out_err: | ||
163 | return retval; | ||
164 | } | ||
165 | |||
166 | static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, | ||
167 | struct path *path) | ||
168 | { | ||
169 | int retval = 0; | ||
170 | struct file_handle f_handle; | ||
171 | struct file_handle *handle = NULL; | ||
172 | |||
173 | /* | ||
174 | * With handle we don't look at the execute bit on the | ||
175 | * the directory. Ideally we would like CAP_DAC_SEARCH. | ||
176 | * But we don't have that | ||
177 | */ | ||
178 | if (!capable(CAP_DAC_READ_SEARCH)) { | ||
179 | retval = -EPERM; | ||
180 | goto out_err; | ||
181 | } | ||
182 | if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { | ||
183 | retval = -EFAULT; | ||
184 | goto out_err; | ||
185 | } | ||
186 | if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || | ||
187 | (f_handle.handle_bytes == 0)) { | ||
188 | retval = -EINVAL; | ||
189 | goto out_err; | ||
190 | } | ||
191 | handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes, | ||
192 | GFP_KERNEL); | ||
193 | if (!handle) { | ||
194 | retval = -ENOMEM; | ||
195 | goto out_err; | ||
196 | } | ||
197 | /* copy the full handle */ | ||
198 | if (copy_from_user(handle, ufh, | ||
199 | sizeof(struct file_handle) + | ||
200 | f_handle.handle_bytes)) { | ||
201 | retval = -EFAULT; | ||
202 | goto out_handle; | ||
203 | } | ||
204 | |||
205 | retval = do_handle_to_path(mountdirfd, handle, path); | ||
206 | |||
207 | out_handle: | ||
208 | kfree(handle); | ||
209 | out_err: | ||
210 | return retval; | ||
211 | } | ||
212 | |||
213 | long do_handle_open(int mountdirfd, | ||
214 | struct file_handle __user *ufh, int open_flag) | ||
215 | { | ||
216 | long retval = 0; | ||
217 | struct path path; | ||
218 | struct file *file; | ||
219 | int fd; | ||
220 | |||
221 | retval = handle_to_path(mountdirfd, ufh, &path); | ||
222 | if (retval) | ||
223 | return retval; | ||
224 | |||
225 | fd = get_unused_fd_flags(open_flag); | ||
226 | if (fd < 0) { | ||
227 | path_put(&path); | ||
228 | return fd; | ||
229 | } | ||
230 | file = file_open_root(path.dentry, path.mnt, "", open_flag); | ||
231 | if (IS_ERR(file)) { | ||
232 | put_unused_fd(fd); | ||
233 | retval = PTR_ERR(file); | ||
234 | } else { | ||
235 | retval = fd; | ||
236 | fsnotify_open(file); | ||
237 | fd_install(fd, file); | ||
238 | } | ||
239 | path_put(&path); | ||
240 | return retval; | ||
241 | } | ||
242 | |||
243 | /** | ||
244 | * sys_open_by_handle_at: Open the file handle | ||
245 | * @mountdirfd: directory file descriptor | ||
246 | * @handle: file handle to be opened | ||
247 | * @flag: open flags. | ||
248 | * | ||
249 | * @mountdirfd indicate the directory file descriptor | ||
250 | * of the mount point. file handle is decoded relative | ||
251 | * to the vfsmount pointed by the @mountdirfd. @flags | ||
252 | * value is same as the open(2) flags. | ||
253 | */ | ||
254 | SYSCALL_DEFINE3(open_by_handle_at, int, mountdirfd, | ||
255 | struct file_handle __user *, handle, | ||
256 | int, flags) | ||
257 | { | ||
258 | long ret; | ||
259 | |||
260 | if (force_o_largefile()) | ||
261 | flags |= O_LARGEFILE; | ||
262 | |||
263 | ret = do_handle_open(mountdirfd, handle, flags); | ||
264 | return ret; | ||
265 | } | ||
diff --git a/fs/file_table.c b/fs/file_table.c index cbeec70ee31..bfab973c6c5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -279,11 +279,10 @@ struct file *fget(unsigned int fd) | |||
279 | rcu_read_lock(); | 279 | rcu_read_lock(); |
280 | file = fcheck_files(files, fd); | 280 | file = fcheck_files(files, fd); |
281 | if (file) { | 281 | if (file) { |
282 | if (!atomic_long_inc_not_zero(&file->f_count)) { | 282 | /* File object ref couldn't be taken */ |
283 | /* File object ref couldn't be taken */ | 283 | if (file->f_mode & FMODE_PATH || |
284 | rcu_read_unlock(); | 284 | !atomic_long_inc_not_zero(&file->f_count)) |
285 | return NULL; | 285 | file = NULL; |
286 | } | ||
287 | } | 286 | } |
288 | rcu_read_unlock(); | 287 | rcu_read_unlock(); |
289 | 288 | ||
@@ -292,6 +291,25 @@ struct file *fget(unsigned int fd) | |||
292 | 291 | ||
293 | EXPORT_SYMBOL(fget); | 292 | EXPORT_SYMBOL(fget); |
294 | 293 | ||
294 | struct file *fget_raw(unsigned int fd) | ||
295 | { | ||
296 | struct file *file; | ||
297 | struct files_struct *files = current->files; | ||
298 | |||
299 | rcu_read_lock(); | ||
300 | file = fcheck_files(files, fd); | ||
301 | if (file) { | ||
302 | /* File object ref couldn't be taken */ | ||
303 | if (!atomic_long_inc_not_zero(&file->f_count)) | ||
304 | file = NULL; | ||
305 | } | ||
306 | rcu_read_unlock(); | ||
307 | |||
308 | return file; | ||
309 | } | ||
310 | |||
311 | EXPORT_SYMBOL(fget_raw); | ||
312 | |||
295 | /* | 313 | /* |
296 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | 314 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. |
297 | * | 315 | * |
@@ -316,6 +334,33 @@ struct file *fget_light(unsigned int fd, int *fput_needed) | |||
316 | *fput_needed = 0; | 334 | *fput_needed = 0; |
317 | if (atomic_read(&files->count) == 1) { | 335 | if (atomic_read(&files->count) == 1) { |
318 | file = fcheck_files(files, fd); | 336 | file = fcheck_files(files, fd); |
337 | if (file && (file->f_mode & FMODE_PATH)) | ||
338 | file = NULL; | ||
339 | } else { | ||
340 | rcu_read_lock(); | ||
341 | file = fcheck_files(files, fd); | ||
342 | if (file) { | ||
343 | if (!(file->f_mode & FMODE_PATH) && | ||
344 | atomic_long_inc_not_zero(&file->f_count)) | ||
345 | *fput_needed = 1; | ||
346 | else | ||
347 | /* Didn't get the reference, someone's freed */ | ||
348 | file = NULL; | ||
349 | } | ||
350 | rcu_read_unlock(); | ||
351 | } | ||
352 | |||
353 | return file; | ||
354 | } | ||
355 | |||
356 | struct file *fget_raw_light(unsigned int fd, int *fput_needed) | ||
357 | { | ||
358 | struct file *file; | ||
359 | struct files_struct *files = current->files; | ||
360 | |||
361 | *fput_needed = 0; | ||
362 | if (atomic_read(&files->count) == 1) { | ||
363 | file = fcheck_files(files, fd); | ||
319 | } else { | 364 | } else { |
320 | rcu_read_lock(); | 365 | rcu_read_lock(); |
321 | file = fcheck_files(files, fd); | 366 | file = fcheck_files(files, fd); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9e3f68cc1bd..051b1a08452 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -637,8 +637,10 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, | |||
637 | u64 nodeid; | 637 | u64 nodeid; |
638 | u32 generation; | 638 | u32 generation; |
639 | 639 | ||
640 | if (*max_len < len) | 640 | if (*max_len < len) { |
641 | *max_len = len; | ||
641 | return 255; | 642 | return 255; |
643 | } | ||
642 | 644 | ||
643 | nodeid = get_fuse_inode(inode)->nodeid; | 645 | nodeid = get_fuse_inode(inode)->nodeid; |
644 | generation = inode->i_generation; | 646 | generation = inode->i_generation; |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 7118f1a780a..cbc07155b1a 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -80,8 +80,11 @@ int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) | |||
80 | struct posix_acl *acl; | 80 | struct posix_acl *acl; |
81 | int error; | 81 | int error; |
82 | 82 | ||
83 | if (flags & IPERM_FLAG_RCU) | 83 | if (flags & IPERM_FLAG_RCU) { |
84 | return -ECHILD; | 84 | if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) |
85 | return -ECHILD; | ||
86 | return -EAGAIN; | ||
87 | } | ||
85 | 88 | ||
86 | acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); | 89 | acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); |
87 | if (IS_ERR(acl)) | 90 | if (IS_ERR(acl)) |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 4f36f8832b9..aad77e4f61b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -695,6 +695,7 @@ out: | |||
695 | if (error == 0) | 695 | if (error == 0) |
696 | return 0; | 696 | return 0; |
697 | 697 | ||
698 | unlock_page(page); | ||
698 | page_cache_release(page); | 699 | page_cache_release(page); |
699 | 700 | ||
700 | gfs2_trans_end(sdp); | 701 | gfs2_trans_end(sdp); |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3c4039d5eef..ef3dc4b9fae 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include "meta_io.h" | 21 | #include "meta_io.h" |
22 | #include "quota.h" | 22 | #include "quota.h" |
23 | #include "rgrp.h" | 23 | #include "rgrp.h" |
24 | #include "super.h" | ||
24 | #include "trans.h" | 25 | #include "trans.h" |
25 | #include "dir.h" | 26 | #include "dir.h" |
26 | #include "util.h" | 27 | #include "util.h" |
@@ -757,7 +758,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
757 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 758 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
758 | struct gfs2_rgrp_list rlist; | 759 | struct gfs2_rgrp_list rlist; |
759 | u64 bn, bstart; | 760 | u64 bn, bstart; |
760 | u32 blen; | 761 | u32 blen, btotal; |
761 | __be64 *p; | 762 | __be64 *p; |
762 | unsigned int rg_blocks = 0; | 763 | unsigned int rg_blocks = 0; |
763 | int metadata; | 764 | int metadata; |
@@ -839,6 +840,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
839 | 840 | ||
840 | bstart = 0; | 841 | bstart = 0; |
841 | blen = 0; | 842 | blen = 0; |
843 | btotal = 0; | ||
842 | 844 | ||
843 | for (p = top; p < bottom; p++) { | 845 | for (p = top; p < bottom; p++) { |
844 | if (!*p) | 846 | if (!*p) |
@@ -851,9 +853,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
851 | else { | 853 | else { |
852 | if (bstart) { | 854 | if (bstart) { |
853 | if (metadata) | 855 | if (metadata) |
854 | gfs2_free_meta(ip, bstart, blen); | 856 | __gfs2_free_meta(ip, bstart, blen); |
855 | else | 857 | else |
856 | gfs2_free_data(ip, bstart, blen); | 858 | __gfs2_free_data(ip, bstart, blen); |
859 | |||
860 | btotal += blen; | ||
857 | } | 861 | } |
858 | 862 | ||
859 | bstart = bn; | 863 | bstart = bn; |
@@ -865,11 +869,17 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
865 | } | 869 | } |
866 | if (bstart) { | 870 | if (bstart) { |
867 | if (metadata) | 871 | if (metadata) |
868 | gfs2_free_meta(ip, bstart, blen); | 872 | __gfs2_free_meta(ip, bstart, blen); |
869 | else | 873 | else |
870 | gfs2_free_data(ip, bstart, blen); | 874 | __gfs2_free_data(ip, bstart, blen); |
875 | |||
876 | btotal += blen; | ||
871 | } | 877 | } |
872 | 878 | ||
879 | gfs2_statfs_change(sdp, 0, +btotal, 0); | ||
880 | gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid, | ||
881 | ip->i_inode.i_gid); | ||
882 | |||
873 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 883 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
874 | 884 | ||
875 | gfs2_dinode_out(ip, dibh->b_data); | 885 | gfs2_dinode_out(ip, dibh->b_data); |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9023db8184f..b5a5e60df0d 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -36,9 +36,13 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, | |||
36 | struct super_block *sb = inode->i_sb; | 36 | struct super_block *sb = inode->i_sb; |
37 | struct gfs2_inode *ip = GFS2_I(inode); | 37 | struct gfs2_inode *ip = GFS2_I(inode); |
38 | 38 | ||
39 | if (*len < GFS2_SMALL_FH_SIZE || | 39 | if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { |
40 | (connectable && *len < GFS2_LARGE_FH_SIZE)) | 40 | *len = GFS2_LARGE_FH_SIZE; |
41 | return 255; | 41 | return 255; |
42 | } else if (*len < GFS2_SMALL_FH_SIZE) { | ||
43 | *len = GFS2_SMALL_FH_SIZE; | ||
44 | return 255; | ||
45 | } | ||
42 | 46 | ||
43 | fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); | 47 | fh[0] = cpu_to_be32(ip->i_no_formal_ino >> 32); |
44 | fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); | 48 | fh[1] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 7cfdcb91336..4074b952b05 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -448,15 +448,20 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
448 | { | 448 | { |
449 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); | 449 | struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); |
450 | 450 | ||
451 | if (!(file->f_flags & O_NOATIME)) { | 451 | if (!(file->f_flags & O_NOATIME) && |
452 | !IS_NOATIME(&ip->i_inode)) { | ||
452 | struct gfs2_holder i_gh; | 453 | struct gfs2_holder i_gh; |
453 | int error; | 454 | int error; |
454 | 455 | ||
455 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); | 456 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
456 | error = gfs2_glock_nq(&i_gh); | 457 | error = gfs2_glock_nq(&i_gh); |
457 | file_accessed(file); | 458 | if (error == 0) { |
458 | if (error == 0) | 459 | file_accessed(file); |
459 | gfs2_glock_dq_uninit(&i_gh); | 460 | gfs2_glock_dq(&i_gh); |
461 | } | ||
462 | gfs2_holder_uninit(&i_gh); | ||
463 | if (error) | ||
464 | return error; | ||
460 | } | 465 | } |
461 | vma->vm_ops = &gfs2_vm_ops; | 466 | vma->vm_ops = &gfs2_vm_ops; |
462 | vma->vm_flags |= VM_CAN_NONLINEAR; | 467 | vma->vm_flags |= VM_CAN_NONLINEAR; |
@@ -617,8 +622,7 @@ static void empty_write_end(struct page *page, unsigned from, | |||
617 | { | 622 | { |
618 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | 623 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); |
619 | 624 | ||
620 | page_zero_new_buffers(page, from, to); | 625 | zero_user(page, from, to-from); |
621 | flush_dcache_page(page); | ||
622 | mark_page_accessed(page); | 626 | mark_page_accessed(page); |
623 | 627 | ||
624 | if (!gfs2_is_writeback(ip)) | 628 | if (!gfs2_is_writeback(ip)) |
@@ -627,36 +631,43 @@ static void empty_write_end(struct page *page, unsigned from, | |||
627 | block_commit_write(page, from, to); | 631 | block_commit_write(page, from, to); |
628 | } | 632 | } |
629 | 633 | ||
630 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | 634 | static int needs_empty_write(sector_t block, struct inode *inode) |
631 | { | 635 | { |
632 | unsigned start, end, next; | ||
633 | struct buffer_head *bh, *head; | ||
634 | int error; | 636 | int error; |
637 | struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 }; | ||
635 | 638 | ||
636 | if (!page_has_buffers(page)) { | 639 | bh_map.b_size = 1 << inode->i_blkbits; |
637 | error = __block_write_begin(page, from, to - from, gfs2_block_map); | 640 | error = gfs2_block_map(inode, block, &bh_map, 0); |
638 | if (unlikely(error)) | 641 | if (unlikely(error)) |
639 | return error; | 642 | return error; |
643 | return !buffer_mapped(&bh_map); | ||
644 | } | ||
640 | 645 | ||
641 | empty_write_end(page, from, to); | 646 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) |
642 | return 0; | 647 | { |
643 | } | 648 | struct inode *inode = page->mapping->host; |
649 | unsigned start, end, next, blksize; | ||
650 | sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
651 | int ret; | ||
644 | 652 | ||
645 | bh = head = page_buffers(page); | 653 | blksize = 1 << inode->i_blkbits; |
646 | next = end = 0; | 654 | next = end = 0; |
647 | while (next < from) { | 655 | while (next < from) { |
648 | next += bh->b_size; | 656 | next += blksize; |
649 | bh = bh->b_this_page; | 657 | block++; |
650 | } | 658 | } |
651 | start = next; | 659 | start = next; |
652 | do { | 660 | do { |
653 | next += bh->b_size; | 661 | next += blksize; |
654 | if (buffer_mapped(bh)) { | 662 | ret = needs_empty_write(block, inode); |
663 | if (unlikely(ret < 0)) | ||
664 | return ret; | ||
665 | if (ret == 0) { | ||
655 | if (end) { | 666 | if (end) { |
656 | error = __block_write_begin(page, start, end - start, | 667 | ret = __block_write_begin(page, start, end - start, |
657 | gfs2_block_map); | 668 | gfs2_block_map); |
658 | if (unlikely(error)) | 669 | if (unlikely(ret)) |
659 | return error; | 670 | return ret; |
660 | empty_write_end(page, start, end); | 671 | empty_write_end(page, start, end); |
661 | end = 0; | 672 | end = 0; |
662 | } | 673 | } |
@@ -664,13 +675,13 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | |||
664 | } | 675 | } |
665 | else | 676 | else |
666 | end = next; | 677 | end = next; |
667 | bh = bh->b_this_page; | 678 | block++; |
668 | } while (next < to); | 679 | } while (next < to); |
669 | 680 | ||
670 | if (end) { | 681 | if (end) { |
671 | error = __block_write_begin(page, start, end - start, gfs2_block_map); | 682 | ret = __block_write_begin(page, start, end - start, gfs2_block_map); |
672 | if (unlikely(error)) | 683 | if (unlikely(ret)) |
673 | return error; | 684 | return ret; |
674 | empty_write_end(page, start, end); | 685 | empty_write_end(page, start, end); |
675 | } | 686 | } |
676 | 687 | ||
@@ -976,8 +987,10 @@ static void do_unflock(struct file *file, struct file_lock *fl) | |||
976 | 987 | ||
977 | mutex_lock(&fp->f_fl_mutex); | 988 | mutex_lock(&fp->f_fl_mutex); |
978 | flock_lock_file_wait(file, fl); | 989 | flock_lock_file_wait(file, fl); |
979 | if (fl_gh->gh_gl) | 990 | if (fl_gh->gh_gl) { |
980 | gfs2_glock_dq_uninit(fl_gh); | 991 | gfs2_glock_dq_wait(fl_gh); |
992 | gfs2_holder_uninit(fl_gh); | ||
993 | } | ||
981 | mutex_unlock(&fp->f_fl_mutex); | 994 | mutex_unlock(&fp->f_fl_mutex); |
982 | } | 995 | } |
983 | 996 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 7cd9a5a68d5..e2431313491 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -26,6 +26,9 @@ | |||
26 | #include <linux/freezer.h> | 26 | #include <linux/freezer.h> |
27 | #include <linux/workqueue.h> | 27 | #include <linux/workqueue.h> |
28 | #include <linux/jiffies.h> | 28 | #include <linux/jiffies.h> |
29 | #include <linux/rcupdate.h> | ||
30 | #include <linux/rculist_bl.h> | ||
31 | #include <linux/bit_spinlock.h> | ||
29 | 32 | ||
30 | #include "gfs2.h" | 33 | #include "gfs2.h" |
31 | #include "incore.h" | 34 | #include "incore.h" |
@@ -41,10 +44,6 @@ | |||
41 | #define CREATE_TRACE_POINTS | 44 | #define CREATE_TRACE_POINTS |
42 | #include "trace_gfs2.h" | 45 | #include "trace_gfs2.h" |
43 | 46 | ||
44 | struct gfs2_gl_hash_bucket { | ||
45 | struct hlist_head hb_list; | ||
46 | }; | ||
47 | |||
48 | struct gfs2_glock_iter { | 47 | struct gfs2_glock_iter { |
49 | int hash; /* hash bucket index */ | 48 | int hash; /* hash bucket index */ |
50 | struct gfs2_sbd *sdp; /* incore superblock */ | 49 | struct gfs2_sbd *sdp; /* incore superblock */ |
@@ -54,7 +53,6 @@ struct gfs2_glock_iter { | |||
54 | 53 | ||
55 | typedef void (*glock_examiner) (struct gfs2_glock * gl); | 54 | typedef void (*glock_examiner) (struct gfs2_glock * gl); |
56 | 55 | ||
57 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); | ||
58 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); | 56 | static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl); |
59 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) | 57 | #define GLOCK_BUG_ON(gl,x) do { if (unlikely(x)) { __dump_glock(NULL, gl); BUG(); } } while(0) |
60 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); | 58 | static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); |
@@ -70,57 +68,9 @@ static DEFINE_SPINLOCK(lru_lock); | |||
70 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) | 68 | #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) |
71 | #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) | 69 | #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) |
72 | 70 | ||
73 | static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; | 71 | static struct hlist_bl_head gl_hash_table[GFS2_GL_HASH_SIZE]; |
74 | static struct dentry *gfs2_root; | 72 | static struct dentry *gfs2_root; |
75 | 73 | ||
76 | /* | ||
77 | * Despite what you might think, the numbers below are not arbitrary :-) | ||
78 | * They are taken from the ipv4 routing hash code, which is well tested | ||
79 | * and thus should be nearly optimal. Later on we might tweek the numbers | ||
80 | * but for now this should be fine. | ||
81 | * | ||
82 | * The reason for putting the locks in a separate array from the list heads | ||
83 | * is that we can have fewer locks than list heads and save memory. We use | ||
84 | * the same hash function for both, but with a different hash mask. | ||
85 | */ | ||
86 | #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ | ||
87 | defined(CONFIG_PROVE_LOCKING) | ||
88 | |||
89 | #ifdef CONFIG_LOCKDEP | ||
90 | # define GL_HASH_LOCK_SZ 256 | ||
91 | #else | ||
92 | # if NR_CPUS >= 32 | ||
93 | # define GL_HASH_LOCK_SZ 4096 | ||
94 | # elif NR_CPUS >= 16 | ||
95 | # define GL_HASH_LOCK_SZ 2048 | ||
96 | # elif NR_CPUS >= 8 | ||
97 | # define GL_HASH_LOCK_SZ 1024 | ||
98 | # elif NR_CPUS >= 4 | ||
99 | # define GL_HASH_LOCK_SZ 512 | ||
100 | # else | ||
101 | # define GL_HASH_LOCK_SZ 256 | ||
102 | # endif | ||
103 | #endif | ||
104 | |||
105 | /* We never want more locks than chains */ | ||
106 | #if GFS2_GL_HASH_SIZE < GL_HASH_LOCK_SZ | ||
107 | # undef GL_HASH_LOCK_SZ | ||
108 | # define GL_HASH_LOCK_SZ GFS2_GL_HASH_SIZE | ||
109 | #endif | ||
110 | |||
111 | static rwlock_t gl_hash_locks[GL_HASH_LOCK_SZ]; | ||
112 | |||
113 | static inline rwlock_t *gl_lock_addr(unsigned int x) | ||
114 | { | ||
115 | return &gl_hash_locks[x & (GL_HASH_LOCK_SZ-1)]; | ||
116 | } | ||
117 | #else /* not SMP, so no spinlocks required */ | ||
118 | static inline rwlock_t *gl_lock_addr(unsigned int x) | ||
119 | { | ||
120 | return NULL; | ||
121 | } | ||
122 | #endif | ||
123 | |||
124 | /** | 74 | /** |
125 | * gl_hash() - Turn glock number into hash bucket number | 75 | * gl_hash() - Turn glock number into hash bucket number |
126 | * @lock: The glock number | 76 | * @lock: The glock number |
@@ -141,25 +91,35 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp, | |||
141 | return h; | 91 | return h; |
142 | } | 92 | } |
143 | 93 | ||
144 | /** | 94 | static inline void spin_lock_bucket(unsigned int hash) |
145 | * glock_free() - Perform a few checks and then release struct gfs2_glock | 95 | { |
146 | * @gl: The glock to release | 96 | struct hlist_bl_head *bl = &gl_hash_table[hash]; |
147 | * | 97 | bit_spin_lock(0, (unsigned long *)bl); |
148 | * Also calls lock module to release its internal structure for this glock. | 98 | } |
149 | * | ||
150 | */ | ||
151 | 99 | ||
152 | static void glock_free(struct gfs2_glock *gl) | 100 | static inline void spin_unlock_bucket(unsigned int hash) |
101 | { | ||
102 | struct hlist_bl_head *bl = &gl_hash_table[hash]; | ||
103 | __bit_spin_unlock(0, (unsigned long *)bl); | ||
104 | } | ||
105 | |||
106 | static void gfs2_glock_dealloc(struct rcu_head *rcu) | ||
107 | { | ||
108 | struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu); | ||
109 | |||
110 | if (gl->gl_ops->go_flags & GLOF_ASPACE) | ||
111 | kmem_cache_free(gfs2_glock_aspace_cachep, gl); | ||
112 | else | ||
113 | kmem_cache_free(gfs2_glock_cachep, gl); | ||
114 | } | ||
115 | |||
116 | void gfs2_glock_free(struct gfs2_glock *gl) | ||
153 | { | 117 | { |
154 | struct gfs2_sbd *sdp = gl->gl_sbd; | 118 | struct gfs2_sbd *sdp = gl->gl_sbd; |
155 | struct address_space *mapping = gfs2_glock2aspace(gl); | ||
156 | struct kmem_cache *cachep = gfs2_glock_cachep; | ||
157 | 119 | ||
158 | GLOCK_BUG_ON(gl, mapping && mapping->nrpages); | 120 | call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); |
159 | trace_gfs2_glock_put(gl); | 121 | if (atomic_dec_and_test(&sdp->sd_glock_disposal)) |
160 | if (mapping) | 122 | wake_up(&sdp->sd_glock_wait); |
161 | cachep = gfs2_glock_aspace_cachep; | ||
162 | sdp->sd_lockstruct.ls_ops->lm_put_lock(cachep, gl); | ||
163 | } | 123 | } |
164 | 124 | ||
165 | /** | 125 | /** |
@@ -185,34 +145,49 @@ static int demote_ok(const struct gfs2_glock *gl) | |||
185 | { | 145 | { |
186 | const struct gfs2_glock_operations *glops = gl->gl_ops; | 146 | const struct gfs2_glock_operations *glops = gl->gl_ops; |
187 | 147 | ||
148 | /* assert_spin_locked(&gl->gl_spin); */ | ||
149 | |||
188 | if (gl->gl_state == LM_ST_UNLOCKED) | 150 | if (gl->gl_state == LM_ST_UNLOCKED) |
189 | return 0; | 151 | return 0; |
190 | if (!list_empty(&gl->gl_holders)) | 152 | if (test_bit(GLF_LFLUSH, &gl->gl_flags)) |
153 | return 0; | ||
154 | if ((gl->gl_name.ln_type != LM_TYPE_INODE) && | ||
155 | !list_empty(&gl->gl_holders)) | ||
191 | return 0; | 156 | return 0; |
192 | if (glops->go_demote_ok) | 157 | if (glops->go_demote_ok) |
193 | return glops->go_demote_ok(gl); | 158 | return glops->go_demote_ok(gl); |
194 | return 1; | 159 | return 1; |
195 | } | 160 | } |
196 | 161 | ||
162 | |||
197 | /** | 163 | /** |
198 | * gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list | 164 | * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list |
199 | * @gl: the glock | 165 | * @gl: the glock |
200 | * | 166 | * |
167 | * If the glock is demotable, then we add it (or move it) to the end | ||
168 | * of the glock LRU list. | ||
201 | */ | 169 | */ |
202 | 170 | ||
203 | static void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | 171 | static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) |
204 | { | 172 | { |
205 | int may_reclaim; | 173 | if (demote_ok(gl)) { |
206 | may_reclaim = (demote_ok(gl) && | 174 | spin_lock(&lru_lock); |
207 | (atomic_read(&gl->gl_ref) == 1 || | 175 | |
208 | (gl->gl_name.ln_type == LM_TYPE_INODE && | 176 | if (!list_empty(&gl->gl_lru)) |
209 | atomic_read(&gl->gl_ref) <= 2))); | 177 | list_del_init(&gl->gl_lru); |
210 | spin_lock(&lru_lock); | 178 | else |
211 | if (list_empty(&gl->gl_lru) && may_reclaim) { | 179 | atomic_inc(&lru_count); |
180 | |||
212 | list_add_tail(&gl->gl_lru, &lru_list); | 181 | list_add_tail(&gl->gl_lru, &lru_list); |
213 | atomic_inc(&lru_count); | 182 | spin_unlock(&lru_lock); |
214 | } | 183 | } |
215 | spin_unlock(&lru_lock); | 184 | } |
185 | |||
186 | void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) | ||
187 | { | ||
188 | spin_lock(&gl->gl_spin); | ||
189 | __gfs2_glock_schedule_for_reclaim(gl); | ||
190 | spin_unlock(&gl->gl_spin); | ||
216 | } | 191 | } |
217 | 192 | ||
218 | /** | 193 | /** |
@@ -227,7 +202,6 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl) | |||
227 | { | 202 | { |
228 | if (atomic_dec_and_test(&gl->gl_ref)) | 203 | if (atomic_dec_and_test(&gl->gl_ref)) |
229 | GLOCK_BUG_ON(gl, 1); | 204 | GLOCK_BUG_ON(gl, 1); |
230 | gfs2_glock_schedule_for_reclaim(gl); | ||
231 | } | 205 | } |
232 | 206 | ||
233 | /** | 207 | /** |
@@ -236,30 +210,26 @@ void gfs2_glock_put_nolock(struct gfs2_glock *gl) | |||
236 | * | 210 | * |
237 | */ | 211 | */ |
238 | 212 | ||
239 | int gfs2_glock_put(struct gfs2_glock *gl) | 213 | void gfs2_glock_put(struct gfs2_glock *gl) |
240 | { | 214 | { |
241 | int rv = 0; | 215 | struct gfs2_sbd *sdp = gl->gl_sbd; |
216 | struct address_space *mapping = gfs2_glock2aspace(gl); | ||
242 | 217 | ||
243 | write_lock(gl_lock_addr(gl->gl_hash)); | 218 | if (atomic_dec_and_test(&gl->gl_ref)) { |
244 | if (atomic_dec_and_lock(&gl->gl_ref, &lru_lock)) { | 219 | spin_lock_bucket(gl->gl_hash); |
245 | hlist_del(&gl->gl_list); | 220 | hlist_bl_del_rcu(&gl->gl_list); |
221 | spin_unlock_bucket(gl->gl_hash); | ||
222 | spin_lock(&lru_lock); | ||
246 | if (!list_empty(&gl->gl_lru)) { | 223 | if (!list_empty(&gl->gl_lru)) { |
247 | list_del_init(&gl->gl_lru); | 224 | list_del_init(&gl->gl_lru); |
248 | atomic_dec(&lru_count); | 225 | atomic_dec(&lru_count); |
249 | } | 226 | } |
250 | spin_unlock(&lru_lock); | 227 | spin_unlock(&lru_lock); |
251 | write_unlock(gl_lock_addr(gl->gl_hash)); | ||
252 | GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); | 228 | GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders)); |
253 | glock_free(gl); | 229 | GLOCK_BUG_ON(gl, mapping && mapping->nrpages); |
254 | rv = 1; | 230 | trace_gfs2_glock_put(gl); |
255 | goto out; | 231 | sdp->sd_lockstruct.ls_ops->lm_put_lock(gl); |
256 | } | 232 | } |
257 | spin_lock(&gl->gl_spin); | ||
258 | gfs2_glock_schedule_for_reclaim(gl); | ||
259 | spin_unlock(&gl->gl_spin); | ||
260 | write_unlock(gl_lock_addr(gl->gl_hash)); | ||
261 | out: | ||
262 | return rv; | ||
263 | } | 233 | } |
264 | 234 | ||
265 | /** | 235 | /** |
@@ -275,17 +245,15 @@ static struct gfs2_glock *search_bucket(unsigned int hash, | |||
275 | const struct lm_lockname *name) | 245 | const struct lm_lockname *name) |
276 | { | 246 | { |
277 | struct gfs2_glock *gl; | 247 | struct gfs2_glock *gl; |
278 | struct hlist_node *h; | 248 | struct hlist_bl_node *h; |
279 | 249 | ||
280 | hlist_for_each_entry(gl, h, &gl_hash_table[hash].hb_list, gl_list) { | 250 | hlist_bl_for_each_entry_rcu(gl, h, &gl_hash_table[hash], gl_list) { |
281 | if (!lm_name_equal(&gl->gl_name, name)) | 251 | if (!lm_name_equal(&gl->gl_name, name)) |
282 | continue; | 252 | continue; |
283 | if (gl->gl_sbd != sdp) | 253 | if (gl->gl_sbd != sdp) |
284 | continue; | 254 | continue; |
285 | 255 | if (atomic_inc_not_zero(&gl->gl_ref)) | |
286 | atomic_inc(&gl->gl_ref); | 256 | return gl; |
287 | |||
288 | return gl; | ||
289 | } | 257 | } |
290 | 258 | ||
291 | return NULL; | 259 | return NULL; |
@@ -743,10 +711,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
743 | struct gfs2_glock *gl, *tmp; | 711 | struct gfs2_glock *gl, *tmp; |
744 | unsigned int hash = gl_hash(sdp, &name); | 712 | unsigned int hash = gl_hash(sdp, &name); |
745 | struct address_space *mapping; | 713 | struct address_space *mapping; |
714 | struct kmem_cache *cachep; | ||
746 | 715 | ||
747 | read_lock(gl_lock_addr(hash)); | 716 | rcu_read_lock(); |
748 | gl = search_bucket(hash, sdp, &name); | 717 | gl = search_bucket(hash, sdp, &name); |
749 | read_unlock(gl_lock_addr(hash)); | 718 | rcu_read_unlock(); |
750 | 719 | ||
751 | *glp = gl; | 720 | *glp = gl; |
752 | if (gl) | 721 | if (gl) |
@@ -755,9 +724,10 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
755 | return -ENOENT; | 724 | return -ENOENT; |
756 | 725 | ||
757 | if (glops->go_flags & GLOF_ASPACE) | 726 | if (glops->go_flags & GLOF_ASPACE) |
758 | gl = kmem_cache_alloc(gfs2_glock_aspace_cachep, GFP_KERNEL); | 727 | cachep = gfs2_glock_aspace_cachep; |
759 | else | 728 | else |
760 | gl = kmem_cache_alloc(gfs2_glock_cachep, GFP_KERNEL); | 729 | cachep = gfs2_glock_cachep; |
730 | gl = kmem_cache_alloc(cachep, GFP_KERNEL); | ||
761 | if (!gl) | 731 | if (!gl) |
762 | return -ENOMEM; | 732 | return -ENOMEM; |
763 | 733 | ||
@@ -790,15 +760,16 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, | |||
790 | mapping->writeback_index = 0; | 760 | mapping->writeback_index = 0; |
791 | } | 761 | } |
792 | 762 | ||
793 | write_lock(gl_lock_addr(hash)); | 763 | spin_lock_bucket(hash); |
794 | tmp = search_bucket(hash, sdp, &name); | 764 | tmp = search_bucket(hash, sdp, &name); |
795 | if (tmp) { | 765 | if (tmp) { |
796 | write_unlock(gl_lock_addr(hash)); | 766 | spin_unlock_bucket(hash); |
797 | glock_free(gl); | 767 | kmem_cache_free(cachep, gl); |
768 | atomic_dec(&sdp->sd_glock_disposal); | ||
798 | gl = tmp; | 769 | gl = tmp; |
799 | } else { | 770 | } else { |
800 | hlist_add_head(&gl->gl_list, &gl_hash_table[hash].hb_list); | 771 | hlist_bl_add_head_rcu(&gl->gl_list, &gl_hash_table[hash]); |
801 | write_unlock(gl_lock_addr(hash)); | 772 | spin_unlock_bucket(hash); |
802 | } | 773 | } |
803 | 774 | ||
804 | *glp = gl; | 775 | *glp = gl; |
@@ -1007,13 +978,13 @@ fail: | |||
1007 | insert_pt = &gh2->gh_list; | 978 | insert_pt = &gh2->gh_list; |
1008 | } | 979 | } |
1009 | set_bit(GLF_QUEUED, &gl->gl_flags); | 980 | set_bit(GLF_QUEUED, &gl->gl_flags); |
981 | trace_gfs2_glock_queue(gh, 1); | ||
1010 | if (likely(insert_pt == NULL)) { | 982 | if (likely(insert_pt == NULL)) { |
1011 | list_add_tail(&gh->gh_list, &gl->gl_holders); | 983 | list_add_tail(&gh->gh_list, &gl->gl_holders); |
1012 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | 984 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) |
1013 | goto do_cancel; | 985 | goto do_cancel; |
1014 | return; | 986 | return; |
1015 | } | 987 | } |
1016 | trace_gfs2_glock_queue(gh, 1); | ||
1017 | list_add_tail(&gh->gh_list, insert_pt); | 988 | list_add_tail(&gh->gh_list, insert_pt); |
1018 | do_cancel: | 989 | do_cancel: |
1019 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | 990 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); |
@@ -1113,6 +1084,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) | |||
1113 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) | 1084 | !test_bit(GLF_DEMOTE, &gl->gl_flags)) |
1114 | fast_path = 1; | 1085 | fast_path = 1; |
1115 | } | 1086 | } |
1087 | __gfs2_glock_schedule_for_reclaim(gl); | ||
1116 | trace_gfs2_glock_queue(gh, 0); | 1088 | trace_gfs2_glock_queue(gh, 0); |
1117 | spin_unlock(&gl->gl_spin); | 1089 | spin_unlock(&gl->gl_spin); |
1118 | if (likely(fast_path)) | 1090 | if (likely(fast_path)) |
@@ -1276,10 +1248,8 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
1276 | 1248 | ||
1277 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) | 1249 | void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) |
1278 | { | 1250 | { |
1279 | unsigned int x; | 1251 | while (num_gh--) |
1280 | 1252 | gfs2_glock_dq(&ghs[num_gh]); | |
1281 | for (x = 0; x < num_gh; x++) | ||
1282 | gfs2_glock_dq(&ghs[x]); | ||
1283 | } | 1253 | } |
1284 | 1254 | ||
1285 | /** | 1255 | /** |
@@ -1291,10 +1261,8 @@ void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs) | |||
1291 | 1261 | ||
1292 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) | 1262 | void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs) |
1293 | { | 1263 | { |
1294 | unsigned int x; | 1264 | while (num_gh--) |
1295 | 1265 | gfs2_glock_dq_uninit(&ghs[num_gh]); | |
1296 | for (x = 0; x < num_gh; x++) | ||
1297 | gfs2_glock_dq_uninit(&ghs[x]); | ||
1298 | } | 1266 | } |
1299 | 1267 | ||
1300 | void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) | 1268 | void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) |
@@ -1440,42 +1408,30 @@ static struct shrinker glock_shrinker = { | |||
1440 | * @sdp: the filesystem | 1408 | * @sdp: the filesystem |
1441 | * @bucket: the bucket | 1409 | * @bucket: the bucket |
1442 | * | 1410 | * |
1443 | * Returns: 1 if the bucket has entries | ||
1444 | */ | 1411 | */ |
1445 | 1412 | ||
1446 | static int examine_bucket(glock_examiner examiner, struct gfs2_sbd *sdp, | 1413 | static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, |
1447 | unsigned int hash) | 1414 | unsigned int hash) |
1448 | { | 1415 | { |
1449 | struct gfs2_glock *gl, *prev = NULL; | 1416 | struct gfs2_glock *gl; |
1450 | int has_entries = 0; | 1417 | struct hlist_bl_head *head = &gl_hash_table[hash]; |
1451 | struct hlist_head *head = &gl_hash_table[hash].hb_list; | 1418 | struct hlist_bl_node *pos; |
1452 | 1419 | ||
1453 | read_lock(gl_lock_addr(hash)); | 1420 | rcu_read_lock(); |
1454 | /* Can't use hlist_for_each_entry - don't want prefetch here */ | 1421 | hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { |
1455 | if (hlist_empty(head)) | 1422 | if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref)) |
1456 | goto out; | ||
1457 | gl = list_entry(head->first, struct gfs2_glock, gl_list); | ||
1458 | while(1) { | ||
1459 | if (!sdp || gl->gl_sbd == sdp) { | ||
1460 | gfs2_glock_hold(gl); | ||
1461 | read_unlock(gl_lock_addr(hash)); | ||
1462 | if (prev) | ||
1463 | gfs2_glock_put(prev); | ||
1464 | prev = gl; | ||
1465 | examiner(gl); | 1423 | examiner(gl); |
1466 | has_entries = 1; | ||
1467 | read_lock(gl_lock_addr(hash)); | ||
1468 | } | ||
1469 | if (gl->gl_list.next == NULL) | ||
1470 | break; | ||
1471 | gl = list_entry(gl->gl_list.next, struct gfs2_glock, gl_list); | ||
1472 | } | 1424 | } |
1473 | out: | 1425 | rcu_read_unlock(); |
1474 | read_unlock(gl_lock_addr(hash)); | ||
1475 | if (prev) | ||
1476 | gfs2_glock_put(prev); | ||
1477 | cond_resched(); | 1426 | cond_resched(); |
1478 | return has_entries; | 1427 | } |
1428 | |||
1429 | static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) | ||
1430 | { | ||
1431 | unsigned x; | ||
1432 | |||
1433 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) | ||
1434 | examine_bucket(examiner, sdp, x); | ||
1479 | } | 1435 | } |
1480 | 1436 | ||
1481 | 1437 | ||
@@ -1529,10 +1485,21 @@ static void clear_glock(struct gfs2_glock *gl) | |||
1529 | 1485 | ||
1530 | void gfs2_glock_thaw(struct gfs2_sbd *sdp) | 1486 | void gfs2_glock_thaw(struct gfs2_sbd *sdp) |
1531 | { | 1487 | { |
1532 | unsigned x; | 1488 | glock_hash_walk(thaw_glock, sdp); |
1489 | } | ||
1533 | 1490 | ||
1534 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) | 1491 | static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) |
1535 | examine_bucket(thaw_glock, sdp, x); | 1492 | { |
1493 | int ret; | ||
1494 | spin_lock(&gl->gl_spin); | ||
1495 | ret = __dump_glock(seq, gl); | ||
1496 | spin_unlock(&gl->gl_spin); | ||
1497 | return ret; | ||
1498 | } | ||
1499 | |||
1500 | static void dump_glock_func(struct gfs2_glock *gl) | ||
1501 | { | ||
1502 | dump_glock(NULL, gl); | ||
1536 | } | 1503 | } |
1537 | 1504 | ||
1538 | /** | 1505 | /** |
@@ -1545,13 +1512,10 @@ void gfs2_glock_thaw(struct gfs2_sbd *sdp) | |||
1545 | 1512 | ||
1546 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) | 1513 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) |
1547 | { | 1514 | { |
1548 | unsigned int x; | 1515 | glock_hash_walk(clear_glock, sdp); |
1549 | |||
1550 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) | ||
1551 | examine_bucket(clear_glock, sdp, x); | ||
1552 | flush_workqueue(glock_workqueue); | 1516 | flush_workqueue(glock_workqueue); |
1553 | wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); | 1517 | wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0); |
1554 | gfs2_dump_lockstate(sdp); | 1518 | glock_hash_walk(dump_glock_func, sdp); |
1555 | } | 1519 | } |
1556 | 1520 | ||
1557 | void gfs2_glock_finish_truncate(struct gfs2_inode *ip) | 1521 | void gfs2_glock_finish_truncate(struct gfs2_inode *ip) |
@@ -1717,66 +1681,15 @@ out: | |||
1717 | return error; | 1681 | return error; |
1718 | } | 1682 | } |
1719 | 1683 | ||
1720 | static int dump_glock(struct seq_file *seq, struct gfs2_glock *gl) | ||
1721 | { | ||
1722 | int ret; | ||
1723 | spin_lock(&gl->gl_spin); | ||
1724 | ret = __dump_glock(seq, gl); | ||
1725 | spin_unlock(&gl->gl_spin); | ||
1726 | return ret; | ||
1727 | } | ||
1728 | 1684 | ||
1729 | /** | ||
1730 | * gfs2_dump_lockstate - print out the current lockstate | ||
1731 | * @sdp: the filesystem | ||
1732 | * @ub: the buffer to copy the information into | ||
1733 | * | ||
1734 | * If @ub is NULL, dump the lockstate to the console. | ||
1735 | * | ||
1736 | */ | ||
1737 | |||
1738 | static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) | ||
1739 | { | ||
1740 | struct gfs2_glock *gl; | ||
1741 | struct hlist_node *h; | ||
1742 | unsigned int x; | ||
1743 | int error = 0; | ||
1744 | |||
1745 | for (x = 0; x < GFS2_GL_HASH_SIZE; x++) { | ||
1746 | |||
1747 | read_lock(gl_lock_addr(x)); | ||
1748 | |||
1749 | hlist_for_each_entry(gl, h, &gl_hash_table[x].hb_list, gl_list) { | ||
1750 | if (gl->gl_sbd != sdp) | ||
1751 | continue; | ||
1752 | |||
1753 | error = dump_glock(NULL, gl); | ||
1754 | if (error) | ||
1755 | break; | ||
1756 | } | ||
1757 | |||
1758 | read_unlock(gl_lock_addr(x)); | ||
1759 | |||
1760 | if (error) | ||
1761 | break; | ||
1762 | } | ||
1763 | |||
1764 | |||
1765 | return error; | ||
1766 | } | ||
1767 | 1685 | ||
1768 | 1686 | ||
1769 | int __init gfs2_glock_init(void) | 1687 | int __init gfs2_glock_init(void) |
1770 | { | 1688 | { |
1771 | unsigned i; | 1689 | unsigned i; |
1772 | for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { | 1690 | for(i = 0; i < GFS2_GL_HASH_SIZE; i++) { |
1773 | INIT_HLIST_HEAD(&gl_hash_table[i].hb_list); | 1691 | INIT_HLIST_BL_HEAD(&gl_hash_table[i]); |
1774 | } | ||
1775 | #ifdef GL_HASH_LOCK_SZ | ||
1776 | for(i = 0; i < GL_HASH_LOCK_SZ; i++) { | ||
1777 | rwlock_init(&gl_hash_locks[i]); | ||
1778 | } | 1692 | } |
1779 | #endif | ||
1780 | 1693 | ||
1781 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | | 1694 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM | |
1782 | WQ_HIGHPRI | WQ_FREEZABLE, 0); | 1695 | WQ_HIGHPRI | WQ_FREEZABLE, 0); |
@@ -1802,62 +1715,54 @@ void gfs2_glock_exit(void) | |||
1802 | destroy_workqueue(gfs2_delete_workqueue); | 1715 | destroy_workqueue(gfs2_delete_workqueue); |
1803 | } | 1716 | } |
1804 | 1717 | ||
1718 | static inline struct gfs2_glock *glock_hash_chain(unsigned hash) | ||
1719 | { | ||
1720 | return hlist_bl_entry(hlist_bl_first_rcu(&gl_hash_table[hash]), | ||
1721 | struct gfs2_glock, gl_list); | ||
1722 | } | ||
1723 | |||
1724 | static inline struct gfs2_glock *glock_hash_next(struct gfs2_glock *gl) | ||
1725 | { | ||
1726 | return hlist_bl_entry(rcu_dereference(gl->gl_list.next), | ||
1727 | struct gfs2_glock, gl_list); | ||
1728 | } | ||
1729 | |||
1805 | static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) | 1730 | static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi) |
1806 | { | 1731 | { |
1807 | struct gfs2_glock *gl; | 1732 | struct gfs2_glock *gl; |
1808 | 1733 | ||
1809 | restart: | 1734 | do { |
1810 | read_lock(gl_lock_addr(gi->hash)); | 1735 | gl = gi->gl; |
1811 | gl = gi->gl; | 1736 | if (gl) { |
1812 | if (gl) { | 1737 | gi->gl = glock_hash_next(gl); |
1813 | gi->gl = hlist_entry(gl->gl_list.next, | 1738 | } else { |
1814 | struct gfs2_glock, gl_list); | 1739 | gi->gl = glock_hash_chain(gi->hash); |
1815 | } else { | 1740 | } |
1816 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, | 1741 | while (gi->gl == NULL) { |
1817 | struct gfs2_glock, gl_list); | 1742 | gi->hash++; |
1818 | } | 1743 | if (gi->hash >= GFS2_GL_HASH_SIZE) { |
1819 | if (gi->gl) | 1744 | rcu_read_unlock(); |
1820 | gfs2_glock_hold(gi->gl); | 1745 | return 1; |
1821 | read_unlock(gl_lock_addr(gi->hash)); | 1746 | } |
1822 | if (gl) | 1747 | gi->gl = glock_hash_chain(gi->hash); |
1823 | gfs2_glock_put(gl); | 1748 | } |
1824 | while (gi->gl == NULL) { | 1749 | /* Skip entries for other sb and dead entries */ |
1825 | gi->hash++; | 1750 | } while (gi->sdp != gi->gl->gl_sbd || atomic_read(&gi->gl->gl_ref) == 0); |
1826 | if (gi->hash >= GFS2_GL_HASH_SIZE) | ||
1827 | return 1; | ||
1828 | read_lock(gl_lock_addr(gi->hash)); | ||
1829 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, | ||
1830 | struct gfs2_glock, gl_list); | ||
1831 | if (gi->gl) | ||
1832 | gfs2_glock_hold(gi->gl); | ||
1833 | read_unlock(gl_lock_addr(gi->hash)); | ||
1834 | } | ||
1835 | |||
1836 | if (gi->sdp != gi->gl->gl_sbd) | ||
1837 | goto restart; | ||
1838 | 1751 | ||
1839 | return 0; | 1752 | return 0; |
1840 | } | 1753 | } |
1841 | 1754 | ||
1842 | static void gfs2_glock_iter_free(struct gfs2_glock_iter *gi) | ||
1843 | { | ||
1844 | if (gi->gl) | ||
1845 | gfs2_glock_put(gi->gl); | ||
1846 | gi->gl = NULL; | ||
1847 | } | ||
1848 | |||
1849 | static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) | 1755 | static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos) |
1850 | { | 1756 | { |
1851 | struct gfs2_glock_iter *gi = seq->private; | 1757 | struct gfs2_glock_iter *gi = seq->private; |
1852 | loff_t n = *pos; | 1758 | loff_t n = *pos; |
1853 | 1759 | ||
1854 | gi->hash = 0; | 1760 | gi->hash = 0; |
1761 | rcu_read_lock(); | ||
1855 | 1762 | ||
1856 | do { | 1763 | do { |
1857 | if (gfs2_glock_iter_next(gi)) { | 1764 | if (gfs2_glock_iter_next(gi)) |
1858 | gfs2_glock_iter_free(gi); | ||
1859 | return NULL; | 1765 | return NULL; |
1860 | } | ||
1861 | } while (n--); | 1766 | } while (n--); |
1862 | 1767 | ||
1863 | return gi->gl; | 1768 | return gi->gl; |
@@ -1870,10 +1775,8 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, | |||
1870 | 1775 | ||
1871 | (*pos)++; | 1776 | (*pos)++; |
1872 | 1777 | ||
1873 | if (gfs2_glock_iter_next(gi)) { | 1778 | if (gfs2_glock_iter_next(gi)) |
1874 | gfs2_glock_iter_free(gi); | ||
1875 | return NULL; | 1779 | return NULL; |
1876 | } | ||
1877 | 1780 | ||
1878 | return gi->gl; | 1781 | return gi->gl; |
1879 | } | 1782 | } |
@@ -1881,7 +1784,10 @@ static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr, | |||
1881 | static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) | 1784 | static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr) |
1882 | { | 1785 | { |
1883 | struct gfs2_glock_iter *gi = seq->private; | 1786 | struct gfs2_glock_iter *gi = seq->private; |
1884 | gfs2_glock_iter_free(gi); | 1787 | |
1788 | if (gi->gl) | ||
1789 | rcu_read_unlock(); | ||
1790 | gi->gl = NULL; | ||
1885 | } | 1791 | } |
1886 | 1792 | ||
1887 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) | 1793 | static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 691851ceb61..aea160690e9 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -118,7 +118,7 @@ struct lm_lockops { | |||
118 | int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); | 118 | int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); |
119 | void (*lm_unmount) (struct gfs2_sbd *sdp); | 119 | void (*lm_unmount) (struct gfs2_sbd *sdp); |
120 | void (*lm_withdraw) (struct gfs2_sbd *sdp); | 120 | void (*lm_withdraw) (struct gfs2_sbd *sdp); |
121 | void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl); | 121 | void (*lm_put_lock) (struct gfs2_glock *gl); |
122 | int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, | 122 | int (*lm_lock) (struct gfs2_glock *gl, unsigned int req_state, |
123 | unsigned int flags); | 123 | unsigned int flags); |
124 | void (*lm_cancel) (struct gfs2_glock *gl); | 124 | void (*lm_cancel) (struct gfs2_glock *gl); |
@@ -174,7 +174,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, | |||
174 | int create, struct gfs2_glock **glp); | 174 | int create, struct gfs2_glock **glp); |
175 | void gfs2_glock_hold(struct gfs2_glock *gl); | 175 | void gfs2_glock_hold(struct gfs2_glock *gl); |
176 | void gfs2_glock_put_nolock(struct gfs2_glock *gl); | 176 | void gfs2_glock_put_nolock(struct gfs2_glock *gl); |
177 | int gfs2_glock_put(struct gfs2_glock *gl); | 177 | void gfs2_glock_put(struct gfs2_glock *gl); |
178 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, | 178 | void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, |
179 | struct gfs2_holder *gh); | 179 | struct gfs2_holder *gh); |
180 | void gfs2_holder_reinit(unsigned int state, unsigned flags, | 180 | void gfs2_holder_reinit(unsigned int state, unsigned flags, |
@@ -223,25 +223,22 @@ static inline int gfs2_glock_nq_init(struct gfs2_glock *gl, | |||
223 | return error; | 223 | return error; |
224 | } | 224 | } |
225 | 225 | ||
226 | /* Lock Value Block functions */ | 226 | extern void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); |
227 | 227 | extern void gfs2_glock_complete(struct gfs2_glock *gl, int ret); | |
228 | int gfs2_lvb_hold(struct gfs2_glock *gl); | 228 | extern void gfs2_reclaim_glock(struct gfs2_sbd *sdp); |
229 | void gfs2_lvb_unhold(struct gfs2_glock *gl); | 229 | extern void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); |
230 | 230 | extern void gfs2_glock_finish_truncate(struct gfs2_inode *ip); | |
231 | void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state); | 231 | extern void gfs2_glock_thaw(struct gfs2_sbd *sdp); |
232 | void gfs2_glock_complete(struct gfs2_glock *gl, int ret); | 232 | extern void gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl); |
233 | void gfs2_reclaim_glock(struct gfs2_sbd *sdp); | 233 | extern void gfs2_glock_free(struct gfs2_glock *gl); |
234 | void gfs2_gl_hash_clear(struct gfs2_sbd *sdp); | 234 | |
235 | void gfs2_glock_finish_truncate(struct gfs2_inode *ip); | 235 | extern int __init gfs2_glock_init(void); |
236 | void gfs2_glock_thaw(struct gfs2_sbd *sdp); | 236 | extern void gfs2_glock_exit(void); |
237 | 237 | ||
238 | int __init gfs2_glock_init(void); | 238 | extern int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); |
239 | void gfs2_glock_exit(void); | 239 | extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); |
240 | 240 | extern int gfs2_register_debugfs(void); | |
241 | int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); | 241 | extern void gfs2_unregister_debugfs(void); |
242 | void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); | ||
243 | int gfs2_register_debugfs(void); | ||
244 | void gfs2_unregister_debugfs(void); | ||
245 | 242 | ||
246 | extern const struct lm_lockops gfs2_dlm_ops; | 243 | extern const struct lm_lockops gfs2_dlm_ops; |
247 | 244 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 263561bf1a5..3754e3cbf02 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -56,20 +56,26 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
56 | BUG_ON(current->journal_info); | 56 | BUG_ON(current->journal_info); |
57 | current->journal_info = &tr; | 57 | current->journal_info = &tr; |
58 | 58 | ||
59 | gfs2_log_lock(sdp); | 59 | spin_lock(&sdp->sd_ail_lock); |
60 | while (!list_empty(head)) { | 60 | while (!list_empty(head)) { |
61 | bd = list_entry(head->next, struct gfs2_bufdata, | 61 | bd = list_entry(head->next, struct gfs2_bufdata, |
62 | bd_ail_gl_list); | 62 | bd_ail_gl_list); |
63 | bh = bd->bd_bh; | 63 | bh = bd->bd_bh; |
64 | gfs2_remove_from_ail(bd); | 64 | gfs2_remove_from_ail(bd); |
65 | spin_unlock(&sdp->sd_ail_lock); | ||
66 | |||
65 | bd->bd_bh = NULL; | 67 | bd->bd_bh = NULL; |
66 | bh->b_private = NULL; | 68 | bh->b_private = NULL; |
67 | bd->bd_blkno = bh->b_blocknr; | 69 | bd->bd_blkno = bh->b_blocknr; |
70 | gfs2_log_lock(sdp); | ||
68 | gfs2_assert_withdraw(sdp, !buffer_busy(bh)); | 71 | gfs2_assert_withdraw(sdp, !buffer_busy(bh)); |
69 | gfs2_trans_add_revoke(sdp, bd); | 72 | gfs2_trans_add_revoke(sdp, bd); |
73 | gfs2_log_unlock(sdp); | ||
74 | |||
75 | spin_lock(&sdp->sd_ail_lock); | ||
70 | } | 76 | } |
71 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); | 77 | gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count)); |
72 | gfs2_log_unlock(sdp); | 78 | spin_unlock(&sdp->sd_ail_lock); |
73 | 79 | ||
74 | gfs2_trans_end(sdp); | 80 | gfs2_trans_end(sdp); |
75 | gfs2_log_flush(sdp, NULL); | 81 | gfs2_log_flush(sdp, NULL); |
@@ -206,8 +212,17 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags) | |||
206 | static int inode_go_demote_ok(const struct gfs2_glock *gl) | 212 | static int inode_go_demote_ok(const struct gfs2_glock *gl) |
207 | { | 213 | { |
208 | struct gfs2_sbd *sdp = gl->gl_sbd; | 214 | struct gfs2_sbd *sdp = gl->gl_sbd; |
215 | struct gfs2_holder *gh; | ||
216 | |||
209 | if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) | 217 | if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object) |
210 | return 0; | 218 | return 0; |
219 | |||
220 | if (!list_empty(&gl->gl_holders)) { | ||
221 | gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list); | ||
222 | if (gh->gh_list.next != &gl->gl_holders) | ||
223 | return 0; | ||
224 | } | ||
225 | |||
211 | return 1; | 226 | return 1; |
212 | } | 227 | } |
213 | 228 | ||
@@ -272,19 +287,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
272 | } | 287 | } |
273 | 288 | ||
274 | /** | 289 | /** |
275 | * rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock | ||
276 | * @gl: the glock | ||
277 | * | ||
278 | * Returns: 1 if it's ok | ||
279 | */ | ||
280 | |||
281 | static int rgrp_go_demote_ok(const struct gfs2_glock *gl) | ||
282 | { | ||
283 | const struct address_space *mapping = (const struct address_space *)(gl + 1); | ||
284 | return !mapping->nrpages; | ||
285 | } | ||
286 | |||
287 | /** | ||
288 | * rgrp_go_lock - operation done after an rgrp lock is locked by | 290 | * rgrp_go_lock - operation done after an rgrp lock is locked by |
289 | * a first holder on this node. | 291 | * a first holder on this node. |
290 | * @gl: the glock | 292 | * @gl: the glock |
@@ -410,7 +412,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = { | |||
410 | const struct gfs2_glock_operations gfs2_rgrp_glops = { | 412 | const struct gfs2_glock_operations gfs2_rgrp_glops = { |
411 | .go_xmote_th = rgrp_go_sync, | 413 | .go_xmote_th = rgrp_go_sync, |
412 | .go_inval = rgrp_go_inval, | 414 | .go_inval = rgrp_go_inval, |
413 | .go_demote_ok = rgrp_go_demote_ok, | ||
414 | .go_lock = rgrp_go_lock, | 415 | .go_lock = rgrp_go_lock, |
415 | .go_unlock = rgrp_go_unlock, | 416 | .go_unlock = rgrp_go_unlock, |
416 | .go_dump = gfs2_rgrp_dump, | 417 | .go_dump = gfs2_rgrp_dump, |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index a79790c0627..870a89d6d4d 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -15,6 +15,8 @@ | |||
15 | #include <linux/workqueue.h> | 15 | #include <linux/workqueue.h> |
16 | #include <linux/dlm.h> | 16 | #include <linux/dlm.h> |
17 | #include <linux/buffer_head.h> | 17 | #include <linux/buffer_head.h> |
18 | #include <linux/rcupdate.h> | ||
19 | #include <linux/rculist_bl.h> | ||
18 | 20 | ||
19 | #define DIO_WAIT 0x00000010 | 21 | #define DIO_WAIT 0x00000010 |
20 | #define DIO_METADATA 0x00000020 | 22 | #define DIO_METADATA 0x00000020 |
@@ -201,7 +203,7 @@ enum { | |||
201 | }; | 203 | }; |
202 | 204 | ||
203 | struct gfs2_glock { | 205 | struct gfs2_glock { |
204 | struct hlist_node gl_list; | 206 | struct hlist_bl_node gl_list; |
205 | unsigned long gl_flags; /* GLF_... */ | 207 | unsigned long gl_flags; /* GLF_... */ |
206 | struct lm_lockname gl_name; | 208 | struct lm_lockname gl_name; |
207 | atomic_t gl_ref; | 209 | atomic_t gl_ref; |
@@ -234,6 +236,7 @@ struct gfs2_glock { | |||
234 | atomic_t gl_ail_count; | 236 | atomic_t gl_ail_count; |
235 | struct delayed_work gl_work; | 237 | struct delayed_work gl_work; |
236 | struct work_struct gl_delete; | 238 | struct work_struct gl_delete; |
239 | struct rcu_head gl_rcu; | ||
237 | }; | 240 | }; |
238 | 241 | ||
239 | #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ | 242 | #define GFS2_MIN_LVB_SIZE 32 /* Min size of LVB that gfs2 supports */ |
@@ -314,6 +317,7 @@ enum { | |||
314 | QDF_USER = 0, | 317 | QDF_USER = 0, |
315 | QDF_CHANGE = 1, | 318 | QDF_CHANGE = 1, |
316 | QDF_LOCKED = 2, | 319 | QDF_LOCKED = 2, |
320 | QDF_REFRESH = 3, | ||
317 | }; | 321 | }; |
318 | 322 | ||
319 | struct gfs2_quota_data { | 323 | struct gfs2_quota_data { |
@@ -647,6 +651,7 @@ struct gfs2_sbd { | |||
647 | unsigned int sd_log_flush_head; | 651 | unsigned int sd_log_flush_head; |
648 | u64 sd_log_flush_wrapped; | 652 | u64 sd_log_flush_wrapped; |
649 | 653 | ||
654 | spinlock_t sd_ail_lock; | ||
650 | struct list_head sd_ail1_list; | 655 | struct list_head sd_ail1_list; |
651 | struct list_head sd_ail2_list; | 656 | struct list_head sd_ail2_list; |
652 | u64 sd_ail_sync_gen; | 657 | u64 sd_ail_sync_gen; |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 6e493aee28f..98c80d8c2a6 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -22,7 +22,6 @@ static void gdlm_ast(void *arg) | |||
22 | { | 22 | { |
23 | struct gfs2_glock *gl = arg; | 23 | struct gfs2_glock *gl = arg; |
24 | unsigned ret = gl->gl_state; | 24 | unsigned ret = gl->gl_state; |
25 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
26 | 25 | ||
27 | BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); | 26 | BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); |
28 | 27 | ||
@@ -31,12 +30,7 @@ static void gdlm_ast(void *arg) | |||
31 | 30 | ||
32 | switch (gl->gl_lksb.sb_status) { | 31 | switch (gl->gl_lksb.sb_status) { |
33 | case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ | 32 | case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */ |
34 | if (gl->gl_ops->go_flags & GLOF_ASPACE) | 33 | gfs2_glock_free(gl); |
35 | kmem_cache_free(gfs2_glock_aspace_cachep, gl); | ||
36 | else | ||
37 | kmem_cache_free(gfs2_glock_cachep, gl); | ||
38 | if (atomic_dec_and_test(&sdp->sd_glock_disposal)) | ||
39 | wake_up(&sdp->sd_glock_wait); | ||
40 | return; | 34 | return; |
41 | case -DLM_ECANCEL: /* Cancel while getting lock */ | 35 | case -DLM_ECANCEL: /* Cancel while getting lock */ |
42 | ret |= LM_OUT_CANCELED; | 36 | ret |= LM_OUT_CANCELED; |
@@ -164,16 +158,14 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, | |||
164 | GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); | 158 | GDLM_STRNAME_BYTES - 1, 0, gdlm_ast, gl, gdlm_bast); |
165 | } | 159 | } |
166 | 160 | ||
167 | static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) | 161 | static void gdlm_put_lock(struct gfs2_glock *gl) |
168 | { | 162 | { |
169 | struct gfs2_sbd *sdp = gl->gl_sbd; | 163 | struct gfs2_sbd *sdp = gl->gl_sbd; |
170 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 164 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
171 | int error; | 165 | int error; |
172 | 166 | ||
173 | if (gl->gl_lksb.sb_lkid == 0) { | 167 | if (gl->gl_lksb.sb_lkid == 0) { |
174 | kmem_cache_free(cachep, gl); | 168 | gfs2_glock_free(gl); |
175 | if (atomic_dec_and_test(&sdp->sd_glock_disposal)) | ||
176 | wake_up(&sdp->sd_glock_wait); | ||
177 | return; | 169 | return; |
178 | } | 170 | } |
179 | 171 | ||
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index eb01f3575e1..e7ed31f858d 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -67,7 +67,7 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct, | |||
67 | * @mapping: The associated mapping (maybe NULL) | 67 | * @mapping: The associated mapping (maybe NULL) |
68 | * @bd: The gfs2_bufdata to remove | 68 | * @bd: The gfs2_bufdata to remove |
69 | * | 69 | * |
70 | * The log lock _must_ be held when calling this function | 70 | * The ail lock _must_ be held when calling this function |
71 | * | 71 | * |
72 | */ | 72 | */ |
73 | 73 | ||
@@ -88,8 +88,8 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd) | |||
88 | */ | 88 | */ |
89 | 89 | ||
90 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) | 90 | static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) |
91 | __releases(&sdp->sd_log_lock) | 91 | __releases(&sdp->sd_ail_lock) |
92 | __acquires(&sdp->sd_log_lock) | 92 | __acquires(&sdp->sd_ail_lock) |
93 | { | 93 | { |
94 | struct gfs2_bufdata *bd, *s; | 94 | struct gfs2_bufdata *bd, *s; |
95 | struct buffer_head *bh; | 95 | struct buffer_head *bh; |
@@ -117,7 +117,7 @@ __acquires(&sdp->sd_log_lock) | |||
117 | list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); | 117 | list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); |
118 | 118 | ||
119 | get_bh(bh); | 119 | get_bh(bh); |
120 | gfs2_log_unlock(sdp); | 120 | spin_unlock(&sdp->sd_ail_lock); |
121 | lock_buffer(bh); | 121 | lock_buffer(bh); |
122 | if (test_clear_buffer_dirty(bh)) { | 122 | if (test_clear_buffer_dirty(bh)) { |
123 | bh->b_end_io = end_buffer_write_sync; | 123 | bh->b_end_io = end_buffer_write_sync; |
@@ -126,7 +126,7 @@ __acquires(&sdp->sd_log_lock) | |||
126 | unlock_buffer(bh); | 126 | unlock_buffer(bh); |
127 | brelse(bh); | 127 | brelse(bh); |
128 | } | 128 | } |
129 | gfs2_log_lock(sdp); | 129 | spin_lock(&sdp->sd_ail_lock); |
130 | 130 | ||
131 | retry = 1; | 131 | retry = 1; |
132 | break; | 132 | break; |
@@ -175,10 +175,10 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) | |||
175 | struct gfs2_ail *ai; | 175 | struct gfs2_ail *ai; |
176 | int done = 0; | 176 | int done = 0; |
177 | 177 | ||
178 | gfs2_log_lock(sdp); | 178 | spin_lock(&sdp->sd_ail_lock); |
179 | head = &sdp->sd_ail1_list; | 179 | head = &sdp->sd_ail1_list; |
180 | if (list_empty(head)) { | 180 | if (list_empty(head)) { |
181 | gfs2_log_unlock(sdp); | 181 | spin_unlock(&sdp->sd_ail_lock); |
182 | return; | 182 | return; |
183 | } | 183 | } |
184 | sync_gen = sdp->sd_ail_sync_gen++; | 184 | sync_gen = sdp->sd_ail_sync_gen++; |
@@ -189,13 +189,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp) | |||
189 | if (ai->ai_sync_gen >= sync_gen) | 189 | if (ai->ai_sync_gen >= sync_gen) |
190 | continue; | 190 | continue; |
191 | ai->ai_sync_gen = sync_gen; | 191 | ai->ai_sync_gen = sync_gen; |
192 | gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ | 192 | gfs2_ail1_start_one(sdp, ai); /* This may drop ail lock */ |
193 | done = 0; | 193 | done = 0; |
194 | break; | 194 | break; |
195 | } | 195 | } |
196 | } | 196 | } |
197 | 197 | ||
198 | gfs2_log_unlock(sdp); | 198 | spin_unlock(&sdp->sd_ail_lock); |
199 | } | 199 | } |
200 | 200 | ||
201 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) | 201 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) |
@@ -203,7 +203,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) | |||
203 | struct gfs2_ail *ai, *s; | 203 | struct gfs2_ail *ai, *s; |
204 | int ret; | 204 | int ret; |
205 | 205 | ||
206 | gfs2_log_lock(sdp); | 206 | spin_lock(&sdp->sd_ail_lock); |
207 | 207 | ||
208 | list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { | 208 | list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { |
209 | if (gfs2_ail1_empty_one(sdp, ai, flags)) | 209 | if (gfs2_ail1_empty_one(sdp, ai, flags)) |
@@ -214,7 +214,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) | |||
214 | 214 | ||
215 | ret = list_empty(&sdp->sd_ail1_list); | 215 | ret = list_empty(&sdp->sd_ail1_list); |
216 | 216 | ||
217 | gfs2_log_unlock(sdp); | 217 | spin_unlock(&sdp->sd_ail_lock); |
218 | 218 | ||
219 | return ret; | 219 | return ret; |
220 | } | 220 | } |
@@ -247,7 +247,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
247 | int wrap = (new_tail < old_tail); | 247 | int wrap = (new_tail < old_tail); |
248 | int a, b, rm; | 248 | int a, b, rm; |
249 | 249 | ||
250 | gfs2_log_lock(sdp); | 250 | spin_lock(&sdp->sd_ail_lock); |
251 | 251 | ||
252 | list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { | 252 | list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { |
253 | a = (old_tail <= ai->ai_first); | 253 | a = (old_tail <= ai->ai_first); |
@@ -263,7 +263,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
263 | kfree(ai); | 263 | kfree(ai); |
264 | } | 264 | } |
265 | 265 | ||
266 | gfs2_log_unlock(sdp); | 266 | spin_unlock(&sdp->sd_ail_lock); |
267 | } | 267 | } |
268 | 268 | ||
269 | /** | 269 | /** |
@@ -421,7 +421,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) | |||
421 | struct gfs2_ail *ai; | 421 | struct gfs2_ail *ai; |
422 | unsigned int tail; | 422 | unsigned int tail; |
423 | 423 | ||
424 | gfs2_log_lock(sdp); | 424 | spin_lock(&sdp->sd_ail_lock); |
425 | 425 | ||
426 | if (list_empty(&sdp->sd_ail1_list)) { | 426 | if (list_empty(&sdp->sd_ail1_list)) { |
427 | tail = sdp->sd_log_head; | 427 | tail = sdp->sd_log_head; |
@@ -430,7 +430,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp) | |||
430 | tail = ai->ai_first; | 430 | tail = ai->ai_first; |
431 | } | 431 | } |
432 | 432 | ||
433 | gfs2_log_unlock(sdp); | 433 | spin_unlock(&sdp->sd_ail_lock); |
434 | 434 | ||
435 | return tail; | 435 | return tail; |
436 | } | 436 | } |
@@ -743,10 +743,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
743 | sdp->sd_log_commited_databuf = 0; | 743 | sdp->sd_log_commited_databuf = 0; |
744 | sdp->sd_log_commited_revoke = 0; | 744 | sdp->sd_log_commited_revoke = 0; |
745 | 745 | ||
746 | spin_lock(&sdp->sd_ail_lock); | ||
746 | if (!list_empty(&ai->ai_ail1_list)) { | 747 | if (!list_empty(&ai->ai_ail1_list)) { |
747 | list_add(&ai->ai_list, &sdp->sd_ail1_list); | 748 | list_add(&ai->ai_list, &sdp->sd_ail1_list); |
748 | ai = NULL; | 749 | ai = NULL; |
749 | } | 750 | } |
751 | spin_unlock(&sdp->sd_ail_lock); | ||
750 | gfs2_log_unlock(sdp); | 752 | gfs2_log_unlock(sdp); |
751 | trace_gfs2_log_flush(sdp, 0); | 753 | trace_gfs2_log_flush(sdp, 0); |
752 | up_write(&sdp->sd_log_flush_lock); | 754 | up_write(&sdp->sd_log_flush_lock); |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index bf33f822058..e919abf25ec 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -51,8 +51,10 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) | |||
51 | /* If this buffer is in the AIL and it has already been written | 51 | /* If this buffer is in the AIL and it has already been written |
52 | * to in-place disk block, remove it from the AIL. | 52 | * to in-place disk block, remove it from the AIL. |
53 | */ | 53 | */ |
54 | spin_lock(&sdp->sd_ail_lock); | ||
54 | if (bd->bd_ail) | 55 | if (bd->bd_ail) |
55 | list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); | 56 | list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); |
57 | spin_unlock(&sdp->sd_ail_lock); | ||
56 | get_bh(bh); | 58 | get_bh(bh); |
57 | atomic_inc(&sdp->sd_log_pinned); | 59 | atomic_inc(&sdp->sd_log_pinned); |
58 | trace_gfs2_pin(bd, 1); | 60 | trace_gfs2_pin(bd, 1); |
@@ -80,7 +82,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
80 | mark_buffer_dirty(bh); | 82 | mark_buffer_dirty(bh); |
81 | clear_buffer_pinned(bh); | 83 | clear_buffer_pinned(bh); |
82 | 84 | ||
83 | gfs2_log_lock(sdp); | 85 | spin_lock(&sdp->sd_ail_lock); |
84 | if (bd->bd_ail) { | 86 | if (bd->bd_ail) { |
85 | list_del(&bd->bd_ail_st_list); | 87 | list_del(&bd->bd_ail_st_list); |
86 | brelse(bh); | 88 | brelse(bh); |
@@ -91,9 +93,11 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
91 | } | 93 | } |
92 | bd->bd_ail = ai; | 94 | bd->bd_ail = ai; |
93 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); | 95 | list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); |
94 | clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); | 96 | spin_unlock(&sdp->sd_ail_lock); |
97 | |||
98 | if (test_and_clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags)) | ||
99 | gfs2_glock_schedule_for_reclaim(bd->bd_gl); | ||
95 | trace_gfs2_pin(bd, 0); | 100 | trace_gfs2_pin(bd, 0); |
96 | gfs2_log_unlock(sdp); | ||
97 | unlock_buffer(bh); | 101 | unlock_buffer(bh); |
98 | atomic_dec(&sdp->sd_log_pinned); | 102 | atomic_dec(&sdp->sd_log_pinned); |
99 | } | 103 | } |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 72c31a315d9..888a5f5a1a5 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -14,6 +14,8 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
17 | #include <linux/rcupdate.h> | ||
18 | #include <linux/rculist_bl.h> | ||
17 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
18 | 20 | ||
19 | #include "gfs2.h" | 21 | #include "gfs2.h" |
@@ -45,7 +47,7 @@ static void gfs2_init_glock_once(void *foo) | |||
45 | { | 47 | { |
46 | struct gfs2_glock *gl = foo; | 48 | struct gfs2_glock *gl = foo; |
47 | 49 | ||
48 | INIT_HLIST_NODE(&gl->gl_list); | 50 | INIT_HLIST_BL_NODE(&gl->gl_list); |
49 | spin_lock_init(&gl->gl_spin); | 51 | spin_lock_init(&gl->gl_spin); |
50 | INIT_LIST_HEAD(&gl->gl_holders); | 52 | INIT_LIST_HEAD(&gl->gl_holders); |
51 | INIT_LIST_HEAD(&gl->gl_lru); | 53 | INIT_LIST_HEAD(&gl->gl_lru); |
@@ -191,6 +193,8 @@ static void __exit exit_gfs2_fs(void) | |||
191 | unregister_filesystem(&gfs2meta_fs_type); | 193 | unregister_filesystem(&gfs2meta_fs_type); |
192 | destroy_workqueue(gfs_recovery_wq); | 194 | destroy_workqueue(gfs_recovery_wq); |
193 | 195 | ||
196 | rcu_barrier(); | ||
197 | |||
194 | kmem_cache_destroy(gfs2_quotad_cachep); | 198 | kmem_cache_destroy(gfs2_quotad_cachep); |
195 | kmem_cache_destroy(gfs2_rgrpd_cachep); | 199 | kmem_cache_destroy(gfs2_rgrpd_cachep); |
196 | kmem_cache_destroy(gfs2_bufdata_cachep); | 200 | kmem_cache_destroy(gfs2_bufdata_cachep); |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 939739c7b3f..01d97f48655 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -326,6 +326,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
326 | brelse(bh); | 326 | brelse(bh); |
327 | } | 327 | } |
328 | if (bd) { | 328 | if (bd) { |
329 | spin_lock(&sdp->sd_ail_lock); | ||
329 | if (bd->bd_ail) { | 330 | if (bd->bd_ail) { |
330 | gfs2_remove_from_ail(bd); | 331 | gfs2_remove_from_ail(bd); |
331 | bh->b_private = NULL; | 332 | bh->b_private = NULL; |
@@ -333,6 +334,7 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
333 | bd->bd_blkno = bh->b_blocknr; | 334 | bd->bd_blkno = bh->b_blocknr; |
334 | gfs2_trans_add_revoke(sdp, bd); | 335 | gfs2_trans_add_revoke(sdp, bd); |
335 | } | 336 | } |
337 | spin_unlock(&sdp->sd_ail_lock); | ||
336 | } | 338 | } |
337 | clear_buffer_dirty(bh); | 339 | clear_buffer_dirty(bh); |
338 | clear_buffer_uptodate(bh); | 340 | clear_buffer_uptodate(bh); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 777927ce6f7..42ef24355af 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -99,6 +99,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
99 | 99 | ||
100 | init_waitqueue_head(&sdp->sd_log_waitq); | 100 | init_waitqueue_head(&sdp->sd_log_waitq); |
101 | init_waitqueue_head(&sdp->sd_logd_waitq); | 101 | init_waitqueue_head(&sdp->sd_logd_waitq); |
102 | spin_lock_init(&sdp->sd_ail_lock); | ||
102 | INIT_LIST_HEAD(&sdp->sd_ail1_list); | 103 | INIT_LIST_HEAD(&sdp->sd_ail1_list); |
103 | INIT_LIST_HEAD(&sdp->sd_ail2_list); | 104 | INIT_LIST_HEAD(&sdp->sd_ail2_list); |
104 | 105 | ||
@@ -928,17 +929,9 @@ static const match_table_t nolock_tokens = { | |||
928 | { Opt_err, NULL }, | 929 | { Opt_err, NULL }, |
929 | }; | 930 | }; |
930 | 931 | ||
931 | static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl) | ||
932 | { | ||
933 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
934 | kmem_cache_free(cachep, gl); | ||
935 | if (atomic_dec_and_test(&sdp->sd_glock_disposal)) | ||
936 | wake_up(&sdp->sd_glock_wait); | ||
937 | } | ||
938 | |||
939 | static const struct lm_lockops nolock_ops = { | 932 | static const struct lm_lockops nolock_ops = { |
940 | .lm_proto_name = "lock_nolock", | 933 | .lm_proto_name = "lock_nolock", |
941 | .lm_put_lock = nolock_put_lock, | 934 | .lm_put_lock = gfs2_glock_free, |
942 | .lm_tokens = &nolock_tokens, | 935 | .lm_tokens = &nolock_tokens, |
943 | }; | 936 | }; |
944 | 937 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index d8b26ac2e20..09e436a5072 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -1026,9 +1026,9 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p) | |||
1026 | 1026 | ||
1027 | /** | 1027 | /** |
1028 | * gfs2_permission - | 1028 | * gfs2_permission - |
1029 | * @inode: | 1029 | * @inode: The inode |
1030 | * @mask: | 1030 | * @mask: The mask to be tested |
1031 | * @nd: passed from Linux VFS, ignored by us | 1031 | * @flags: Indicates whether this is an RCU path walk or not |
1032 | * | 1032 | * |
1033 | * This may be called from the VFS directly, or from within GFS2 with the | 1033 | * This may be called from the VFS directly, or from within GFS2 with the |
1034 | * inode locked, so we look to see if the glock is already locked and only | 1034 | * inode locked, so we look to see if the glock is already locked and only |
@@ -1044,11 +1044,11 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags) | |||
1044 | int error; | 1044 | int error; |
1045 | int unlock = 0; | 1045 | int unlock = 0; |
1046 | 1046 | ||
1047 | if (flags & IPERM_FLAG_RCU) | ||
1048 | return -ECHILD; | ||
1049 | 1047 | ||
1050 | ip = GFS2_I(inode); | 1048 | ip = GFS2_I(inode); |
1051 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { | 1049 | if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { |
1050 | if (flags & IPERM_FLAG_RCU) | ||
1051 | return -ECHILD; | ||
1052 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); | 1052 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); |
1053 | if (error) | 1053 | if (error) |
1054 | return error; | 1054 | return error; |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a689901963d..e23d9864c41 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -834,6 +834,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
834 | goto out_end_trans; | 834 | goto out_end_trans; |
835 | 835 | ||
836 | do_qc(qd, -qd->qd_change_sync); | 836 | do_qc(qd, -qd->qd_change_sync); |
837 | set_bit(QDF_REFRESH, &qd->qd_flags); | ||
837 | } | 838 | } |
838 | 839 | ||
839 | error = 0; | 840 | error = 0; |
@@ -929,6 +930,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
929 | { | 930 | { |
930 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 931 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
931 | struct gfs2_alloc *al = ip->i_alloc; | 932 | struct gfs2_alloc *al = ip->i_alloc; |
933 | struct gfs2_quota_data *qd; | ||
932 | unsigned int x; | 934 | unsigned int x; |
933 | int error = 0; | 935 | int error = 0; |
934 | 936 | ||
@@ -942,7 +944,11 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid) | |||
942 | sort_qd, NULL); | 944 | sort_qd, NULL); |
943 | 945 | ||
944 | for (x = 0; x < al->al_qd_num; x++) { | 946 | for (x = 0; x < al->al_qd_num; x++) { |
945 | error = do_glock(al->al_qd[x], NO_FORCE, &al->al_qd_ghs[x]); | 947 | int force = NO_FORCE; |
948 | qd = al->al_qd[x]; | ||
949 | if (test_and_clear_bit(QDF_REFRESH, &qd->qd_flags)) | ||
950 | force = FORCE; | ||
951 | error = do_glock(qd, force, &al->al_qd_ghs[x]); | ||
946 | if (error) | 952 | if (error) |
947 | break; | 953 | break; |
948 | } | 954 | } |
@@ -1587,6 +1593,8 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1587 | 1593 | ||
1588 | offset = qd2offset(qd); | 1594 | offset = qd2offset(qd); |
1589 | alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); | 1595 | alloc_required = gfs2_write_alloc_required(ip, offset, sizeof(struct gfs2_quota)); |
1596 | if (gfs2_is_stuffed(ip)) | ||
1597 | alloc_required = 1; | ||
1590 | if (alloc_required) { | 1598 | if (alloc_required) { |
1591 | al = gfs2_alloc_get(ip); | 1599 | al = gfs2_alloc_get(ip); |
1592 | if (al == NULL) | 1600 | if (al == NULL) |
@@ -1600,7 +1608,9 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1600 | blocks += gfs2_rg_blocks(al); | 1608 | blocks += gfs2_rg_blocks(al); |
1601 | } | 1609 | } |
1602 | 1610 | ||
1603 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); | 1611 | /* Some quotas span block boundaries and can update two blocks, |
1612 | adding an extra block to the transaction to handle such quotas */ | ||
1613 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 2, 0); | ||
1604 | if (error) | 1614 | if (error) |
1605 | goto out_release; | 1615 | goto out_release; |
1606 | 1616 | ||
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 7293ea27020..cf930cd9664 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -1602,7 +1602,7 @@ rgrp_error: | |||
1602 | * | 1602 | * |
1603 | */ | 1603 | */ |
1604 | 1604 | ||
1605 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | 1605 | void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) |
1606 | { | 1606 | { |
1607 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1607 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1608 | struct gfs2_rgrpd *rgd; | 1608 | struct gfs2_rgrpd *rgd; |
@@ -1617,7 +1617,21 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1617 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1617 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1618 | 1618 | ||
1619 | gfs2_trans_add_rg(rgd); | 1619 | gfs2_trans_add_rg(rgd); |
1620 | } | ||
1620 | 1621 | ||
1622 | /** | ||
1623 | * gfs2_free_data - free a contiguous run of data block(s) | ||
1624 | * @ip: the inode these blocks are being freed from | ||
1625 | * @bstart: first block of a run of contiguous blocks | ||
1626 | * @blen: the length of the block run | ||
1627 | * | ||
1628 | */ | ||
1629 | |||
1630 | void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | ||
1631 | { | ||
1632 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1633 | |||
1634 | __gfs2_free_data(ip, bstart, blen); | ||
1621 | gfs2_statfs_change(sdp, 0, +blen, 0); | 1635 | gfs2_statfs_change(sdp, 0, +blen, 0); |
1622 | gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); | 1636 | gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); |
1623 | } | 1637 | } |
@@ -1630,7 +1644,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1630 | * | 1644 | * |
1631 | */ | 1645 | */ |
1632 | 1646 | ||
1633 | void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | 1647 | void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) |
1634 | { | 1648 | { |
1635 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1649 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1636 | struct gfs2_rgrpd *rgd; | 1650 | struct gfs2_rgrpd *rgd; |
@@ -1645,10 +1659,24 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | |||
1645 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); | 1659 | gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); |
1646 | 1660 | ||
1647 | gfs2_trans_add_rg(rgd); | 1661 | gfs2_trans_add_rg(rgd); |
1662 | gfs2_meta_wipe(ip, bstart, blen); | ||
1663 | } | ||
1648 | 1664 | ||
1665 | /** | ||
1666 | * gfs2_free_meta - free a contiguous run of data block(s) | ||
1667 | * @ip: the inode these blocks are being freed from | ||
1668 | * @bstart: first block of a run of contiguous blocks | ||
1669 | * @blen: the length of the block run | ||
1670 | * | ||
1671 | */ | ||
1672 | |||
1673 | void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen) | ||
1674 | { | ||
1675 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1676 | |||
1677 | __gfs2_free_meta(ip, bstart, blen); | ||
1649 | gfs2_statfs_change(sdp, 0, +blen, 0); | 1678 | gfs2_statfs_change(sdp, 0, +blen, 0); |
1650 | gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); | 1679 | gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); |
1651 | gfs2_meta_wipe(ip, bstart, blen); | ||
1652 | } | 1680 | } |
1653 | 1681 | ||
1654 | void gfs2_unlink_di(struct inode *inode) | 1682 | void gfs2_unlink_di(struct inode *inode) |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 50c2bb04369..a80e3034ac4 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -52,7 +52,9 @@ extern int gfs2_ri_update(struct gfs2_inode *ip); | |||
52 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); | 52 | extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); |
53 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); | 53 | extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); |
54 | 54 | ||
55 | extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | ||
55 | extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); | 56 | extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); |
57 | extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); | ||
56 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); | 58 | extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); |
57 | extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); | 59 | extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); |
58 | extern void gfs2_unlink_di(struct inode *inode); | 60 | extern void gfs2_unlink_di(struct inode *inode); |
diff --git a/fs/inode.c b/fs/inode.c index 0647d80accf..9910c039f02 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -84,16 +84,13 @@ static struct hlist_head *inode_hashtable __read_mostly; | |||
84 | DEFINE_SPINLOCK(inode_lock); | 84 | DEFINE_SPINLOCK(inode_lock); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * iprune_sem provides exclusion between the kswapd or try_to_free_pages | 87 | * iprune_sem provides exclusion between the icache shrinking and the |
88 | * icache shrinking path, and the umount path. Without this exclusion, | 88 | * umount path. |
89 | * by the time prune_icache calls iput for the inode whose pages it has | ||
90 | * been invalidating, or by the time it calls clear_inode & destroy_inode | ||
91 | * from its final dispose_list, the struct super_block they refer to | ||
92 | * (for inode->i_sb->s_op) may already have been freed and reused. | ||
93 | * | 89 | * |
94 | * We make this an rwsem because the fastpath is icache shrinking. In | 90 | * We don't actually need it to protect anything in the umount path, |
95 | * some cases a filesystem may be doing a significant amount of work in | 91 | * but only need to cycle through it to make sure any inode that |
96 | * its inode reclaim code, so this should improve parallelism. | 92 | * prune_icache took off the LRU list has been fully torn down by the |
93 | * time we are past evict_inodes. | ||
97 | */ | 94 | */ |
98 | static DECLARE_RWSEM(iprune_sem); | 95 | static DECLARE_RWSEM(iprune_sem); |
99 | 96 | ||
@@ -516,17 +513,12 @@ void evict_inodes(struct super_block *sb) | |||
516 | struct inode *inode, *next; | 513 | struct inode *inode, *next; |
517 | LIST_HEAD(dispose); | 514 | LIST_HEAD(dispose); |
518 | 515 | ||
519 | down_write(&iprune_sem); | ||
520 | |||
521 | spin_lock(&inode_lock); | 516 | spin_lock(&inode_lock); |
522 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 517 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
523 | if (atomic_read(&inode->i_count)) | 518 | if (atomic_read(&inode->i_count)) |
524 | continue; | 519 | continue; |
525 | 520 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | |
526 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
527 | WARN_ON(1); | ||
528 | continue; | 521 | continue; |
529 | } | ||
530 | 522 | ||
531 | inode->i_state |= I_FREEING; | 523 | inode->i_state |= I_FREEING; |
532 | 524 | ||
@@ -542,6 +534,13 @@ void evict_inodes(struct super_block *sb) | |||
542 | spin_unlock(&inode_lock); | 534 | spin_unlock(&inode_lock); |
543 | 535 | ||
544 | dispose_list(&dispose); | 536 | dispose_list(&dispose); |
537 | |||
538 | /* | ||
539 | * Cycle through iprune_sem to make sure any inode that prune_icache | ||
540 | * moved off the list before we took the lock has been fully torn | ||
541 | * down. | ||
542 | */ | ||
543 | down_write(&iprune_sem); | ||
545 | up_write(&iprune_sem); | 544 | up_write(&iprune_sem); |
546 | } | 545 | } |
547 | 546 | ||
@@ -561,8 +560,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
561 | struct inode *inode, *next; | 560 | struct inode *inode, *next; |
562 | LIST_HEAD(dispose); | 561 | LIST_HEAD(dispose); |
563 | 562 | ||
564 | down_write(&iprune_sem); | ||
565 | |||
566 | spin_lock(&inode_lock); | 563 | spin_lock(&inode_lock); |
567 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 564 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
568 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | 565 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) |
@@ -590,7 +587,6 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
590 | spin_unlock(&inode_lock); | 587 | spin_unlock(&inode_lock); |
591 | 588 | ||
592 | dispose_list(&dispose); | 589 | dispose_list(&dispose); |
593 | up_write(&iprune_sem); | ||
594 | 590 | ||
595 | return busy; | 591 | return busy; |
596 | } | 592 | } |
diff --git a/fs/internal.h b/fs/internal.h index 9b976b57d7f..f3d15de44b1 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -106,6 +106,19 @@ extern void put_super(struct super_block *sb); | |||
106 | struct nameidata; | 106 | struct nameidata; |
107 | extern struct file *nameidata_to_filp(struct nameidata *); | 107 | extern struct file *nameidata_to_filp(struct nameidata *); |
108 | extern void release_open_intent(struct nameidata *); | 108 | extern void release_open_intent(struct nameidata *); |
109 | struct open_flags { | ||
110 | int open_flag; | ||
111 | int mode; | ||
112 | int acc_mode; | ||
113 | int intent; | ||
114 | }; | ||
115 | extern struct file *do_filp_open(int dfd, const char *pathname, | ||
116 | const struct open_flags *op, int lookup_flags); | ||
117 | extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, | ||
118 | const char *, const struct open_flags *, int lookup_flags); | ||
119 | |||
120 | extern long do_handle_open(int mountdirfd, | ||
121 | struct file_handle __user *ufh, int open_flag); | ||
109 | 122 | ||
110 | /* | 123 | /* |
111 | * inode.c | 124 | * inode.c |
diff --git a/fs/isofs/export.c b/fs/isofs/export.c index ed752cb3847..dd4687ff30d 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c | |||
@@ -124,9 +124,13 @@ isofs_export_encode_fh(struct dentry *dentry, | |||
124 | * offset of the inode and the upper 16 bits of fh32[1] to | 124 | * offset of the inode and the upper 16 bits of fh32[1] to |
125 | * hold the offset of the parent. | 125 | * hold the offset of the parent. |
126 | */ | 126 | */ |
127 | 127 | if (connectable && (len < 5)) { | |
128 | if (len < 3 || (connectable && len < 5)) | 128 | *max_len = 5; |
129 | return 255; | ||
130 | } else if (len < 3) { | ||
131 | *max_len = 3; | ||
129 | return 255; | 132 | return 255; |
133 | } | ||
130 | 134 | ||
131 | len = 3; | 135 | len = 3; |
132 | fh32[0] = ei->i_iget5_block; | 136 | fh32[0] = ei->i_iget5_block; |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index c61600ece4f..eaaf2b511e8 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -809,9 +809,6 @@ static int jfs_link(struct dentry *old_dentry, | |||
809 | if (ip->i_nlink == JFS_LINK_MAX) | 809 | if (ip->i_nlink == JFS_LINK_MAX) |
810 | return -EMLINK; | 810 | return -EMLINK; |
811 | 811 | ||
812 | if (ip->i_nlink == 0) | ||
813 | return -ENOENT; | ||
814 | |||
815 | dquot_initialize(dir); | 812 | dquot_initialize(dir); |
816 | 813 | ||
817 | tid = txBegin(ip->i_sb, 0); | 814 | tid = txBegin(ip->i_sb, 0); |
diff --git a/fs/namei.c b/fs/namei.c index a4689eb2df2..b912b7abe74 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -136,7 +136,7 @@ static int do_getname(const char __user *filename, char *page) | |||
136 | return retval; | 136 | return retval; |
137 | } | 137 | } |
138 | 138 | ||
139 | char * getname(const char __user * filename) | 139 | static char *getname_flags(const char __user * filename, int flags) |
140 | { | 140 | { |
141 | char *tmp, *result; | 141 | char *tmp, *result; |
142 | 142 | ||
@@ -147,14 +147,21 @@ char * getname(const char __user * filename) | |||
147 | 147 | ||
148 | result = tmp; | 148 | result = tmp; |
149 | if (retval < 0) { | 149 | if (retval < 0) { |
150 | __putname(tmp); | 150 | if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) { |
151 | result = ERR_PTR(retval); | 151 | __putname(tmp); |
152 | result = ERR_PTR(retval); | ||
153 | } | ||
152 | } | 154 | } |
153 | } | 155 | } |
154 | audit_getname(result); | 156 | audit_getname(result); |
155 | return result; | 157 | return result; |
156 | } | 158 | } |
157 | 159 | ||
160 | char *getname(const char __user * filename) | ||
161 | { | ||
162 | return getname_flags(filename, 0); | ||
163 | } | ||
164 | |||
158 | #ifdef CONFIG_AUDITSYSCALL | 165 | #ifdef CONFIG_AUDITSYSCALL |
159 | void putname(const char *name) | 166 | void putname(const char *name) |
160 | { | 167 | { |
@@ -401,9 +408,11 @@ static int nameidata_drop_rcu(struct nameidata *nd) | |||
401 | { | 408 | { |
402 | struct fs_struct *fs = current->fs; | 409 | struct fs_struct *fs = current->fs; |
403 | struct dentry *dentry = nd->path.dentry; | 410 | struct dentry *dentry = nd->path.dentry; |
411 | int want_root = 0; | ||
404 | 412 | ||
405 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 413 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
406 | if (nd->root.mnt) { | 414 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { |
415 | want_root = 1; | ||
407 | spin_lock(&fs->lock); | 416 | spin_lock(&fs->lock); |
408 | if (nd->root.mnt != fs->root.mnt || | 417 | if (nd->root.mnt != fs->root.mnt || |
409 | nd->root.dentry != fs->root.dentry) | 418 | nd->root.dentry != fs->root.dentry) |
@@ -414,7 +423,7 @@ static int nameidata_drop_rcu(struct nameidata *nd) | |||
414 | goto err; | 423 | goto err; |
415 | BUG_ON(nd->inode != dentry->d_inode); | 424 | BUG_ON(nd->inode != dentry->d_inode); |
416 | spin_unlock(&dentry->d_lock); | 425 | spin_unlock(&dentry->d_lock); |
417 | if (nd->root.mnt) { | 426 | if (want_root) { |
418 | path_get(&nd->root); | 427 | path_get(&nd->root); |
419 | spin_unlock(&fs->lock); | 428 | spin_unlock(&fs->lock); |
420 | } | 429 | } |
@@ -427,7 +436,7 @@ static int nameidata_drop_rcu(struct nameidata *nd) | |||
427 | err: | 436 | err: |
428 | spin_unlock(&dentry->d_lock); | 437 | spin_unlock(&dentry->d_lock); |
429 | err_root: | 438 | err_root: |
430 | if (nd->root.mnt) | 439 | if (want_root) |
431 | spin_unlock(&fs->lock); | 440 | spin_unlock(&fs->lock); |
432 | return -ECHILD; | 441 | return -ECHILD; |
433 | } | 442 | } |
@@ -454,9 +463,11 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
454 | { | 463 | { |
455 | struct fs_struct *fs = current->fs; | 464 | struct fs_struct *fs = current->fs; |
456 | struct dentry *parent = nd->path.dentry; | 465 | struct dentry *parent = nd->path.dentry; |
466 | int want_root = 0; | ||
457 | 467 | ||
458 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 468 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
459 | if (nd->root.mnt) { | 469 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { |
470 | want_root = 1; | ||
460 | spin_lock(&fs->lock); | 471 | spin_lock(&fs->lock); |
461 | if (nd->root.mnt != fs->root.mnt || | 472 | if (nd->root.mnt != fs->root.mnt || |
462 | nd->root.dentry != fs->root.dentry) | 473 | nd->root.dentry != fs->root.dentry) |
@@ -476,7 +487,7 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry | |||
476 | parent->d_count++; | 487 | parent->d_count++; |
477 | spin_unlock(&dentry->d_lock); | 488 | spin_unlock(&dentry->d_lock); |
478 | spin_unlock(&parent->d_lock); | 489 | spin_unlock(&parent->d_lock); |
479 | if (nd->root.mnt) { | 490 | if (want_root) { |
480 | path_get(&nd->root); | 491 | path_get(&nd->root); |
481 | spin_unlock(&fs->lock); | 492 | spin_unlock(&fs->lock); |
482 | } | 493 | } |
@@ -490,7 +501,7 @@ err: | |||
490 | spin_unlock(&dentry->d_lock); | 501 | spin_unlock(&dentry->d_lock); |
491 | spin_unlock(&parent->d_lock); | 502 | spin_unlock(&parent->d_lock); |
492 | err_root: | 503 | err_root: |
493 | if (nd->root.mnt) | 504 | if (want_root) |
494 | spin_unlock(&fs->lock); | 505 | spin_unlock(&fs->lock); |
495 | return -ECHILD; | 506 | return -ECHILD; |
496 | } | 507 | } |
@@ -498,8 +509,16 @@ err_root: | |||
498 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | 509 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ |
499 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) | 510 | static inline int nameidata_dentry_drop_rcu_maybe(struct nameidata *nd, struct dentry *dentry) |
500 | { | 511 | { |
501 | if (nd->flags & LOOKUP_RCU) | 512 | if (nd->flags & LOOKUP_RCU) { |
502 | return nameidata_dentry_drop_rcu(nd, dentry); | 513 | if (unlikely(nameidata_dentry_drop_rcu(nd, dentry))) { |
514 | nd->flags &= ~LOOKUP_RCU; | ||
515 | if (!(nd->flags & LOOKUP_ROOT)) | ||
516 | nd->root.mnt = NULL; | ||
517 | rcu_read_unlock(); | ||
518 | br_read_unlock(vfsmount_lock); | ||
519 | return -ECHILD; | ||
520 | } | ||
521 | } | ||
503 | return 0; | 522 | return 0; |
504 | } | 523 | } |
505 | 524 | ||
@@ -518,7 +537,8 @@ static int nameidata_drop_rcu_last(struct nameidata *nd) | |||
518 | 537 | ||
519 | BUG_ON(!(nd->flags & LOOKUP_RCU)); | 538 | BUG_ON(!(nd->flags & LOOKUP_RCU)); |
520 | nd->flags &= ~LOOKUP_RCU; | 539 | nd->flags &= ~LOOKUP_RCU; |
521 | nd->root.mnt = NULL; | 540 | if (!(nd->flags & LOOKUP_ROOT)) |
541 | nd->root.mnt = NULL; | ||
522 | spin_lock(&dentry->d_lock); | 542 | spin_lock(&dentry->d_lock); |
523 | if (!__d_rcu_to_refcount(dentry, nd->seq)) | 543 | if (!__d_rcu_to_refcount(dentry, nd->seq)) |
524 | goto err_unlock; | 544 | goto err_unlock; |
@@ -539,14 +559,6 @@ err_unlock: | |||
539 | return -ECHILD; | 559 | return -ECHILD; |
540 | } | 560 | } |
541 | 561 | ||
542 | /* Try to drop out of rcu-walk mode if we were in it, otherwise do nothing. */ | ||
543 | static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd) | ||
544 | { | ||
545 | if (likely(nd->flags & LOOKUP_RCU)) | ||
546 | return nameidata_drop_rcu_last(nd); | ||
547 | return 0; | ||
548 | } | ||
549 | |||
550 | /** | 562 | /** |
551 | * release_open_intent - free up open intent resources | 563 | * release_open_intent - free up open intent resources |
552 | * @nd: pointer to nameidata | 564 | * @nd: pointer to nameidata |
@@ -590,42 +602,8 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
590 | return dentry; | 602 | return dentry; |
591 | } | 603 | } |
592 | 604 | ||
593 | static inline struct dentry * | ||
594 | do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd) | ||
595 | { | ||
596 | int status = d_revalidate(dentry, nd); | ||
597 | if (likely(status > 0)) | ||
598 | return dentry; | ||
599 | if (status == -ECHILD) { | ||
600 | if (nameidata_dentry_drop_rcu(nd, dentry)) | ||
601 | return ERR_PTR(-ECHILD); | ||
602 | return do_revalidate(dentry, nd); | ||
603 | } | ||
604 | if (status < 0) | ||
605 | return ERR_PTR(status); | ||
606 | /* Don't d_invalidate in rcu-walk mode */ | ||
607 | if (nameidata_dentry_drop_rcu(nd, dentry)) | ||
608 | return ERR_PTR(-ECHILD); | ||
609 | if (!d_invalidate(dentry)) { | ||
610 | dput(dentry); | ||
611 | dentry = NULL; | ||
612 | } | ||
613 | return dentry; | ||
614 | } | ||
615 | |||
616 | static inline int need_reval_dot(struct dentry *dentry) | ||
617 | { | ||
618 | if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) | ||
619 | return 0; | ||
620 | |||
621 | if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) | ||
622 | return 0; | ||
623 | |||
624 | return 1; | ||
625 | } | ||
626 | |||
627 | /* | 605 | /* |
628 | * force_reval_path - force revalidation of a dentry | 606 | * handle_reval_path - force revalidation of a dentry |
629 | * | 607 | * |
630 | * In some situations the path walking code will trust dentries without | 608 | * In some situations the path walking code will trust dentries without |
631 | * revalidating them. This causes problems for filesystems that depend on | 609 | * revalidating them. This causes problems for filesystems that depend on |
@@ -639,27 +617,28 @@ static inline int need_reval_dot(struct dentry *dentry) | |||
639 | * invalidate the dentry. It's up to the caller to handle putting references | 617 | * invalidate the dentry. It's up to the caller to handle putting references |
640 | * to the path if necessary. | 618 | * to the path if necessary. |
641 | */ | 619 | */ |
642 | static int | 620 | static inline int handle_reval_path(struct nameidata *nd) |
643 | force_reval_path(struct path *path, struct nameidata *nd) | ||
644 | { | 621 | { |
622 | struct dentry *dentry = nd->path.dentry; | ||
645 | int status; | 623 | int status; |
646 | struct dentry *dentry = path->dentry; | ||
647 | 624 | ||
648 | /* | 625 | if (likely(!(nd->flags & LOOKUP_JUMPED))) |
649 | * only check on filesystems where it's possible for the dentry to | 626 | return 0; |
650 | * become stale. | 627 | |
651 | */ | 628 | if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE))) |
652 | if (!need_reval_dot(dentry)) | 629 | return 0; |
630 | |||
631 | if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))) | ||
653 | return 0; | 632 | return 0; |
654 | 633 | ||
634 | /* Note: we do not d_invalidate() */ | ||
655 | status = d_revalidate(dentry, nd); | 635 | status = d_revalidate(dentry, nd); |
656 | if (status > 0) | 636 | if (status > 0) |
657 | return 0; | 637 | return 0; |
658 | 638 | ||
659 | if (!status) { | 639 | if (!status) |
660 | d_invalidate(dentry); | ||
661 | status = -ESTALE; | 640 | status = -ESTALE; |
662 | } | 641 | |
663 | return status; | 642 | return status; |
664 | } | 643 | } |
665 | 644 | ||
@@ -728,6 +707,7 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l | |||
728 | path_put(&nd->path); | 707 | path_put(&nd->path); |
729 | nd->path = nd->root; | 708 | nd->path = nd->root; |
730 | path_get(&nd->root); | 709 | path_get(&nd->root); |
710 | nd->flags |= LOOKUP_JUMPED; | ||
731 | } | 711 | } |
732 | nd->inode = nd->path.dentry->d_inode; | 712 | nd->inode = nd->path.dentry->d_inode; |
733 | 713 | ||
@@ -757,19 +737,42 @@ static inline void path_to_nameidata(const struct path *path, | |||
757 | nd->path.dentry = path->dentry; | 737 | nd->path.dentry = path->dentry; |
758 | } | 738 | } |
759 | 739 | ||
740 | static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) | ||
741 | { | ||
742 | struct inode *inode = link->dentry->d_inode; | ||
743 | if (!IS_ERR(cookie) && inode->i_op->put_link) | ||
744 | inode->i_op->put_link(link->dentry, nd, cookie); | ||
745 | path_put(link); | ||
746 | } | ||
747 | |||
760 | static __always_inline int | 748 | static __always_inline int |
761 | __do_follow_link(const struct path *link, struct nameidata *nd, void **p) | 749 | follow_link(struct path *link, struct nameidata *nd, void **p) |
762 | { | 750 | { |
763 | int error; | 751 | int error; |
764 | struct dentry *dentry = link->dentry; | 752 | struct dentry *dentry = link->dentry; |
765 | 753 | ||
766 | BUG_ON(nd->flags & LOOKUP_RCU); | 754 | BUG_ON(nd->flags & LOOKUP_RCU); |
767 | 755 | ||
756 | if (link->mnt == nd->path.mnt) | ||
757 | mntget(link->mnt); | ||
758 | |||
759 | if (unlikely(current->total_link_count >= 40)) { | ||
760 | *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ | ||
761 | path_put(&nd->path); | ||
762 | return -ELOOP; | ||
763 | } | ||
764 | cond_resched(); | ||
765 | current->total_link_count++; | ||
766 | |||
768 | touch_atime(link->mnt, dentry); | 767 | touch_atime(link->mnt, dentry); |
769 | nd_set_link(nd, NULL); | 768 | nd_set_link(nd, NULL); |
770 | 769 | ||
771 | if (link->mnt == nd->path.mnt) | 770 | error = security_inode_follow_link(link->dentry, nd); |
772 | mntget(link->mnt); | 771 | if (error) { |
772 | *p = ERR_PTR(error); /* no ->put_link(), please */ | ||
773 | path_put(&nd->path); | ||
774 | return error; | ||
775 | } | ||
773 | 776 | ||
774 | nd->last_type = LAST_BIND; | 777 | nd->last_type = LAST_BIND; |
775 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); | 778 | *p = dentry->d_inode->i_op->follow_link(dentry, nd); |
@@ -780,56 +783,18 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p) | |||
780 | if (s) | 783 | if (s) |
781 | error = __vfs_follow_link(nd, s); | 784 | error = __vfs_follow_link(nd, s); |
782 | else if (nd->last_type == LAST_BIND) { | 785 | else if (nd->last_type == LAST_BIND) { |
783 | error = force_reval_path(&nd->path, nd); | 786 | nd->flags |= LOOKUP_JUMPED; |
784 | if (error) | 787 | nd->inode = nd->path.dentry->d_inode; |
788 | if (nd->inode->i_op->follow_link) { | ||
789 | /* stepped on a _really_ weird one */ | ||
785 | path_put(&nd->path); | 790 | path_put(&nd->path); |
791 | error = -ELOOP; | ||
792 | } | ||
786 | } | 793 | } |
787 | } | 794 | } |
788 | return error; | 795 | return error; |
789 | } | 796 | } |
790 | 797 | ||
791 | /* | ||
792 | * This limits recursive symlink follows to 8, while | ||
793 | * limiting consecutive symlinks to 40. | ||
794 | * | ||
795 | * Without that kind of total limit, nasty chains of consecutive | ||
796 | * symlinks can cause almost arbitrarily long lookups. | ||
797 | */ | ||
798 | static inline int do_follow_link(struct inode *inode, struct path *path, struct nameidata *nd) | ||
799 | { | ||
800 | void *cookie; | ||
801 | int err = -ELOOP; | ||
802 | |||
803 | /* We drop rcu-walk here */ | ||
804 | if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) | ||
805 | return -ECHILD; | ||
806 | BUG_ON(inode != path->dentry->d_inode); | ||
807 | |||
808 | if (current->link_count >= MAX_NESTED_LINKS) | ||
809 | goto loop; | ||
810 | if (current->total_link_count >= 40) | ||
811 | goto loop; | ||
812 | BUG_ON(nd->depth >= MAX_NESTED_LINKS); | ||
813 | cond_resched(); | ||
814 | err = security_inode_follow_link(path->dentry, nd); | ||
815 | if (err) | ||
816 | goto loop; | ||
817 | current->link_count++; | ||
818 | current->total_link_count++; | ||
819 | nd->depth++; | ||
820 | err = __do_follow_link(path, nd, &cookie); | ||
821 | if (!IS_ERR(cookie) && path->dentry->d_inode->i_op->put_link) | ||
822 | path->dentry->d_inode->i_op->put_link(path->dentry, nd, cookie); | ||
823 | path_put(path); | ||
824 | current->link_count--; | ||
825 | nd->depth--; | ||
826 | return err; | ||
827 | loop: | ||
828 | path_put_conditional(path, nd); | ||
829 | path_put(&nd->path); | ||
830 | return err; | ||
831 | } | ||
832 | |||
833 | static int follow_up_rcu(struct path *path) | 798 | static int follow_up_rcu(struct path *path) |
834 | { | 799 | { |
835 | struct vfsmount *parent; | 800 | struct vfsmount *parent; |
@@ -1068,7 +1033,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1068 | 1033 | ||
1069 | seq = read_seqcount_begin(&parent->d_seq); | 1034 | seq = read_seqcount_begin(&parent->d_seq); |
1070 | if (read_seqcount_retry(&old->d_seq, nd->seq)) | 1035 | if (read_seqcount_retry(&old->d_seq, nd->seq)) |
1071 | return -ECHILD; | 1036 | goto failed; |
1072 | inode = parent->d_inode; | 1037 | inode = parent->d_inode; |
1073 | nd->path.dentry = parent; | 1038 | nd->path.dentry = parent; |
1074 | nd->seq = seq; | 1039 | nd->seq = seq; |
@@ -1081,8 +1046,15 @@ static int follow_dotdot_rcu(struct nameidata *nd) | |||
1081 | } | 1046 | } |
1082 | __follow_mount_rcu(nd, &nd->path, &inode, true); | 1047 | __follow_mount_rcu(nd, &nd->path, &inode, true); |
1083 | nd->inode = inode; | 1048 | nd->inode = inode; |
1084 | |||
1085 | return 0; | 1049 | return 0; |
1050 | |||
1051 | failed: | ||
1052 | nd->flags &= ~LOOKUP_RCU; | ||
1053 | if (!(nd->flags & LOOKUP_ROOT)) | ||
1054 | nd->root.mnt = NULL; | ||
1055 | rcu_read_unlock(); | ||
1056 | br_read_unlock(vfsmount_lock); | ||
1057 | return -ECHILD; | ||
1086 | } | 1058 | } |
1087 | 1059 | ||
1088 | /* | 1060 | /* |
@@ -1216,68 +1188,85 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, | |||
1216 | { | 1188 | { |
1217 | struct vfsmount *mnt = nd->path.mnt; | 1189 | struct vfsmount *mnt = nd->path.mnt; |
1218 | struct dentry *dentry, *parent = nd->path.dentry; | 1190 | struct dentry *dentry, *parent = nd->path.dentry; |
1219 | struct inode *dir; | 1191 | int need_reval = 1; |
1192 | int status = 1; | ||
1220 | int err; | 1193 | int err; |
1221 | 1194 | ||
1222 | /* | 1195 | /* |
1223 | * See if the low-level filesystem might want | ||
1224 | * to use its own hash.. | ||
1225 | */ | ||
1226 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | ||
1227 | err = parent->d_op->d_hash(parent, nd->inode, name); | ||
1228 | if (err < 0) | ||
1229 | return err; | ||
1230 | } | ||
1231 | |||
1232 | /* | ||
1233 | * Rename seqlock is not required here because in the off chance | 1196 | * Rename seqlock is not required here because in the off chance |
1234 | * of a false negative due to a concurrent rename, we're going to | 1197 | * of a false negative due to a concurrent rename, we're going to |
1235 | * do the non-racy lookup, below. | 1198 | * do the non-racy lookup, below. |
1236 | */ | 1199 | */ |
1237 | if (nd->flags & LOOKUP_RCU) { | 1200 | if (nd->flags & LOOKUP_RCU) { |
1238 | unsigned seq; | 1201 | unsigned seq; |
1239 | |||
1240 | *inode = nd->inode; | 1202 | *inode = nd->inode; |
1241 | dentry = __d_lookup_rcu(parent, name, &seq, inode); | 1203 | dentry = __d_lookup_rcu(parent, name, &seq, inode); |
1242 | if (!dentry) { | 1204 | if (!dentry) |
1243 | if (nameidata_drop_rcu(nd)) | 1205 | goto unlazy; |
1244 | return -ECHILD; | 1206 | |
1245 | goto need_lookup; | ||
1246 | } | ||
1247 | /* Memory barrier in read_seqcount_begin of child is enough */ | 1207 | /* Memory barrier in read_seqcount_begin of child is enough */ |
1248 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) | 1208 | if (__read_seqcount_retry(&parent->d_seq, nd->seq)) |
1249 | return -ECHILD; | 1209 | return -ECHILD; |
1250 | |||
1251 | nd->seq = seq; | 1210 | nd->seq = seq; |
1211 | |||
1252 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { | 1212 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { |
1253 | dentry = do_revalidate_rcu(dentry, nd); | 1213 | status = d_revalidate(dentry, nd); |
1254 | if (!dentry) | 1214 | if (unlikely(status <= 0)) { |
1255 | goto need_lookup; | 1215 | if (status != -ECHILD) |
1256 | if (IS_ERR(dentry)) | 1216 | need_reval = 0; |
1257 | goto fail; | 1217 | goto unlazy; |
1258 | if (!(nd->flags & LOOKUP_RCU)) | 1218 | } |
1259 | goto done; | ||
1260 | } | 1219 | } |
1261 | path->mnt = mnt; | 1220 | path->mnt = mnt; |
1262 | path->dentry = dentry; | 1221 | path->dentry = dentry; |
1263 | if (likely(__follow_mount_rcu(nd, path, inode, false))) | 1222 | if (likely(__follow_mount_rcu(nd, path, inode, false))) |
1264 | return 0; | 1223 | return 0; |
1265 | if (nameidata_drop_rcu(nd)) | 1224 | unlazy: |
1266 | return -ECHILD; | 1225 | if (dentry) { |
1267 | /* fallthru */ | 1226 | if (nameidata_dentry_drop_rcu(nd, dentry)) |
1227 | return -ECHILD; | ||
1228 | } else { | ||
1229 | if (nameidata_drop_rcu(nd)) | ||
1230 | return -ECHILD; | ||
1231 | } | ||
1232 | } else { | ||
1233 | dentry = __d_lookup(parent, name); | ||
1268 | } | 1234 | } |
1269 | dentry = __d_lookup(parent, name); | 1235 | |
1270 | if (!dentry) | 1236 | retry: |
1271 | goto need_lookup; | 1237 | if (unlikely(!dentry)) { |
1272 | found: | 1238 | struct inode *dir = parent->d_inode; |
1273 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { | 1239 | BUG_ON(nd->inode != dir); |
1274 | dentry = do_revalidate(dentry, nd); | 1240 | |
1275 | if (!dentry) | 1241 | mutex_lock(&dir->i_mutex); |
1276 | goto need_lookup; | 1242 | dentry = d_lookup(parent, name); |
1277 | if (IS_ERR(dentry)) | 1243 | if (likely(!dentry)) { |
1278 | goto fail; | 1244 | dentry = d_alloc_and_lookup(parent, name, nd); |
1245 | if (IS_ERR(dentry)) { | ||
1246 | mutex_unlock(&dir->i_mutex); | ||
1247 | return PTR_ERR(dentry); | ||
1248 | } | ||
1249 | /* known good */ | ||
1250 | need_reval = 0; | ||
1251 | status = 1; | ||
1252 | } | ||
1253 | mutex_unlock(&dir->i_mutex); | ||
1279 | } | 1254 | } |
1280 | done: | 1255 | if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) |
1256 | status = d_revalidate(dentry, nd); | ||
1257 | if (unlikely(status <= 0)) { | ||
1258 | if (status < 0) { | ||
1259 | dput(dentry); | ||
1260 | return status; | ||
1261 | } | ||
1262 | if (!d_invalidate(dentry)) { | ||
1263 | dput(dentry); | ||
1264 | dentry = NULL; | ||
1265 | need_reval = 1; | ||
1266 | goto retry; | ||
1267 | } | ||
1268 | } | ||
1269 | |||
1281 | path->mnt = mnt; | 1270 | path->mnt = mnt; |
1282 | path->dentry = dentry; | 1271 | path->dentry = dentry; |
1283 | err = follow_managed(path, nd->flags); | 1272 | err = follow_managed(path, nd->flags); |
@@ -1287,39 +1276,113 @@ done: | |||
1287 | } | 1276 | } |
1288 | *inode = path->dentry->d_inode; | 1277 | *inode = path->dentry->d_inode; |
1289 | return 0; | 1278 | return 0; |
1279 | } | ||
1280 | |||
1281 | static inline int may_lookup(struct nameidata *nd) | ||
1282 | { | ||
1283 | if (nd->flags & LOOKUP_RCU) { | ||
1284 | int err = exec_permission(nd->inode, IPERM_FLAG_RCU); | ||
1285 | if (err != -ECHILD) | ||
1286 | return err; | ||
1287 | if (nameidata_drop_rcu(nd)) | ||
1288 | return -ECHILD; | ||
1289 | } | ||
1290 | return exec_permission(nd->inode, 0); | ||
1291 | } | ||
1290 | 1292 | ||
1291 | need_lookup: | 1293 | static inline int handle_dots(struct nameidata *nd, int type) |
1292 | dir = parent->d_inode; | 1294 | { |
1293 | BUG_ON(nd->inode != dir); | 1295 | if (type == LAST_DOTDOT) { |
1296 | if (nd->flags & LOOKUP_RCU) { | ||
1297 | if (follow_dotdot_rcu(nd)) | ||
1298 | return -ECHILD; | ||
1299 | } else | ||
1300 | follow_dotdot(nd); | ||
1301 | } | ||
1302 | return 0; | ||
1303 | } | ||
1294 | 1304 | ||
1295 | mutex_lock(&dir->i_mutex); | 1305 | static void terminate_walk(struct nameidata *nd) |
1296 | /* | 1306 | { |
1297 | * First re-do the cached lookup just in case it was created | 1307 | if (!(nd->flags & LOOKUP_RCU)) { |
1298 | * while we waited for the directory semaphore, or the first | 1308 | path_put(&nd->path); |
1299 | * lookup failed due to an unrelated rename. | 1309 | } else { |
1300 | * | 1310 | nd->flags &= ~LOOKUP_RCU; |
1301 | * This could use version numbering or similar to avoid unnecessary | 1311 | if (!(nd->flags & LOOKUP_ROOT)) |
1302 | * cache lookups, but then we'd have to do the first lookup in the | 1312 | nd->root.mnt = NULL; |
1303 | * non-racy way. However in the common case here, everything should | 1313 | rcu_read_unlock(); |
1304 | * be hot in cache, so would it be a big win? | 1314 | br_read_unlock(vfsmount_lock); |
1305 | */ | ||
1306 | dentry = d_lookup(parent, name); | ||
1307 | if (likely(!dentry)) { | ||
1308 | dentry = d_alloc_and_lookup(parent, name, nd); | ||
1309 | mutex_unlock(&dir->i_mutex); | ||
1310 | if (IS_ERR(dentry)) | ||
1311 | goto fail; | ||
1312 | goto done; | ||
1313 | } | 1315 | } |
1316 | } | ||
1317 | |||
1318 | static inline int walk_component(struct nameidata *nd, struct path *path, | ||
1319 | struct qstr *name, int type, int follow) | ||
1320 | { | ||
1321 | struct inode *inode; | ||
1322 | int err; | ||
1314 | /* | 1323 | /* |
1315 | * Uhhuh! Nasty case: the cache was re-populated while | 1324 | * "." and ".." are special - ".." especially so because it has |
1316 | * we waited on the semaphore. Need to revalidate. | 1325 | * to be able to know about the current root directory and |
1326 | * parent relationships. | ||
1317 | */ | 1327 | */ |
1318 | mutex_unlock(&dir->i_mutex); | 1328 | if (unlikely(type != LAST_NORM)) |
1319 | goto found; | 1329 | return handle_dots(nd, type); |
1330 | err = do_lookup(nd, name, path, &inode); | ||
1331 | if (unlikely(err)) { | ||
1332 | terminate_walk(nd); | ||
1333 | return err; | ||
1334 | } | ||
1335 | if (!inode) { | ||
1336 | path_to_nameidata(path, nd); | ||
1337 | terminate_walk(nd); | ||
1338 | return -ENOENT; | ||
1339 | } | ||
1340 | if (unlikely(inode->i_op->follow_link) && follow) { | ||
1341 | if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry)) | ||
1342 | return -ECHILD; | ||
1343 | BUG_ON(inode != path->dentry->d_inode); | ||
1344 | return 1; | ||
1345 | } | ||
1346 | path_to_nameidata(path, nd); | ||
1347 | nd->inode = inode; | ||
1348 | return 0; | ||
1349 | } | ||
1320 | 1350 | ||
1321 | fail: | 1351 | /* |
1322 | return PTR_ERR(dentry); | 1352 | * This limits recursive symlink follows to 8, while |
1353 | * limiting consecutive symlinks to 40. | ||
1354 | * | ||
1355 | * Without that kind of total limit, nasty chains of consecutive | ||
1356 | * symlinks can cause almost arbitrarily long lookups. | ||
1357 | */ | ||
1358 | static inline int nested_symlink(struct path *path, struct nameidata *nd) | ||
1359 | { | ||
1360 | int res; | ||
1361 | |||
1362 | BUG_ON(nd->depth >= MAX_NESTED_LINKS); | ||
1363 | if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { | ||
1364 | path_put_conditional(path, nd); | ||
1365 | path_put(&nd->path); | ||
1366 | return -ELOOP; | ||
1367 | } | ||
1368 | |||
1369 | nd->depth++; | ||
1370 | current->link_count++; | ||
1371 | |||
1372 | do { | ||
1373 | struct path link = *path; | ||
1374 | void *cookie; | ||
1375 | |||
1376 | res = follow_link(&link, nd, &cookie); | ||
1377 | if (!res) | ||
1378 | res = walk_component(nd, path, &nd->last, | ||
1379 | nd->last_type, LOOKUP_FOLLOW); | ||
1380 | put_link(nd, &link, cookie); | ||
1381 | } while (res > 0); | ||
1382 | |||
1383 | current->link_count--; | ||
1384 | nd->depth--; | ||
1385 | return res; | ||
1323 | } | 1386 | } |
1324 | 1387 | ||
1325 | /* | 1388 | /* |
@@ -1339,30 +1402,18 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1339 | while (*name=='/') | 1402 | while (*name=='/') |
1340 | name++; | 1403 | name++; |
1341 | if (!*name) | 1404 | if (!*name) |
1342 | goto return_reval; | 1405 | return 0; |
1343 | |||
1344 | if (nd->depth) | ||
1345 | lookup_flags = LOOKUP_FOLLOW | (nd->flags & LOOKUP_CONTINUE); | ||
1346 | 1406 | ||
1347 | /* At this point we know we have a real path component. */ | 1407 | /* At this point we know we have a real path component. */ |
1348 | for(;;) { | 1408 | for(;;) { |
1349 | struct inode *inode; | ||
1350 | unsigned long hash; | 1409 | unsigned long hash; |
1351 | struct qstr this; | 1410 | struct qstr this; |
1352 | unsigned int c; | 1411 | unsigned int c; |
1412 | int type; | ||
1353 | 1413 | ||
1354 | nd->flags |= LOOKUP_CONTINUE; | 1414 | nd->flags |= LOOKUP_CONTINUE; |
1355 | if (nd->flags & LOOKUP_RCU) { | 1415 | |
1356 | err = exec_permission(nd->inode, IPERM_FLAG_RCU); | 1416 | err = may_lookup(nd); |
1357 | if (err == -ECHILD) { | ||
1358 | if (nameidata_drop_rcu(nd)) | ||
1359 | return -ECHILD; | ||
1360 | goto exec_again; | ||
1361 | } | ||
1362 | } else { | ||
1363 | exec_again: | ||
1364 | err = exec_permission(nd->inode, 0); | ||
1365 | } | ||
1366 | if (err) | 1417 | if (err) |
1367 | break; | 1418 | break; |
1368 | 1419 | ||
@@ -1378,52 +1429,43 @@ exec_again: | |||
1378 | this.len = name - (const char *) this.name; | 1429 | this.len = name - (const char *) this.name; |
1379 | this.hash = end_name_hash(hash); | 1430 | this.hash = end_name_hash(hash); |
1380 | 1431 | ||
1432 | type = LAST_NORM; | ||
1433 | if (this.name[0] == '.') switch (this.len) { | ||
1434 | case 2: | ||
1435 | if (this.name[1] == '.') { | ||
1436 | type = LAST_DOTDOT; | ||
1437 | nd->flags |= LOOKUP_JUMPED; | ||
1438 | } | ||
1439 | break; | ||
1440 | case 1: | ||
1441 | type = LAST_DOT; | ||
1442 | } | ||
1443 | if (likely(type == LAST_NORM)) { | ||
1444 | struct dentry *parent = nd->path.dentry; | ||
1445 | nd->flags &= ~LOOKUP_JUMPED; | ||
1446 | if (unlikely(parent->d_flags & DCACHE_OP_HASH)) { | ||
1447 | err = parent->d_op->d_hash(parent, nd->inode, | ||
1448 | &this); | ||
1449 | if (err < 0) | ||
1450 | break; | ||
1451 | } | ||
1452 | } | ||
1453 | |||
1381 | /* remove trailing slashes? */ | 1454 | /* remove trailing slashes? */ |
1382 | if (!c) | 1455 | if (!c) |
1383 | goto last_component; | 1456 | goto last_component; |
1384 | while (*++name == '/'); | 1457 | while (*++name == '/'); |
1385 | if (!*name) | 1458 | if (!*name) |
1386 | goto last_with_slashes; | 1459 | goto last_component; |
1387 | 1460 | ||
1388 | /* | 1461 | err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW); |
1389 | * "." and ".." are special - ".." especially so because it has | 1462 | if (err < 0) |
1390 | * to be able to know about the current root directory and | 1463 | return err; |
1391 | * parent relationships. | ||
1392 | */ | ||
1393 | if (this.name[0] == '.') switch (this.len) { | ||
1394 | default: | ||
1395 | break; | ||
1396 | case 2: | ||
1397 | if (this.name[1] != '.') | ||
1398 | break; | ||
1399 | if (nd->flags & LOOKUP_RCU) { | ||
1400 | if (follow_dotdot_rcu(nd)) | ||
1401 | return -ECHILD; | ||
1402 | } else | ||
1403 | follow_dotdot(nd); | ||
1404 | /* fallthrough */ | ||
1405 | case 1: | ||
1406 | continue; | ||
1407 | } | ||
1408 | /* This does the actual lookups.. */ | ||
1409 | err = do_lookup(nd, &this, &next, &inode); | ||
1410 | if (err) | ||
1411 | break; | ||
1412 | err = -ENOENT; | ||
1413 | if (!inode) | ||
1414 | goto out_dput; | ||
1415 | 1464 | ||
1416 | if (inode->i_op->follow_link) { | 1465 | if (err) { |
1417 | err = do_follow_link(inode, &next, nd); | 1466 | err = nested_symlink(&next, nd); |
1418 | if (err) | 1467 | if (err) |
1419 | goto return_err; | 1468 | return err; |
1420 | nd->inode = nd->path.dentry->d_inode; | ||
1421 | err = -ENOENT; | ||
1422 | if (!nd->inode) | ||
1423 | break; | ||
1424 | } else { | ||
1425 | path_to_nameidata(&next, nd); | ||
1426 | nd->inode = inode; | ||
1427 | } | 1469 | } |
1428 | err = -ENOTDIR; | 1470 | err = -ENOTDIR; |
1429 | if (!nd->inode->i_op->lookup) | 1471 | if (!nd->inode->i_op->lookup) |
@@ -1431,210 +1473,109 @@ exec_again: | |||
1431 | continue; | 1473 | continue; |
1432 | /* here ends the main loop */ | 1474 | /* here ends the main loop */ |
1433 | 1475 | ||
1434 | last_with_slashes: | ||
1435 | lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | ||
1436 | last_component: | 1476 | last_component: |
1437 | /* Clear LOOKUP_CONTINUE iff it was previously unset */ | 1477 | /* Clear LOOKUP_CONTINUE iff it was previously unset */ |
1438 | nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; | 1478 | nd->flags &= lookup_flags | ~LOOKUP_CONTINUE; |
1439 | if (lookup_flags & LOOKUP_PARENT) | ||
1440 | goto lookup_parent; | ||
1441 | if (this.name[0] == '.') switch (this.len) { | ||
1442 | default: | ||
1443 | break; | ||
1444 | case 2: | ||
1445 | if (this.name[1] != '.') | ||
1446 | break; | ||
1447 | if (nd->flags & LOOKUP_RCU) { | ||
1448 | if (follow_dotdot_rcu(nd)) | ||
1449 | return -ECHILD; | ||
1450 | } else | ||
1451 | follow_dotdot(nd); | ||
1452 | /* fallthrough */ | ||
1453 | case 1: | ||
1454 | goto return_reval; | ||
1455 | } | ||
1456 | err = do_lookup(nd, &this, &next, &inode); | ||
1457 | if (err) | ||
1458 | break; | ||
1459 | if (inode && unlikely(inode->i_op->follow_link) && | ||
1460 | (lookup_flags & LOOKUP_FOLLOW)) { | ||
1461 | err = do_follow_link(inode, &next, nd); | ||
1462 | if (err) | ||
1463 | goto return_err; | ||
1464 | nd->inode = nd->path.dentry->d_inode; | ||
1465 | } else { | ||
1466 | path_to_nameidata(&next, nd); | ||
1467 | nd->inode = inode; | ||
1468 | } | ||
1469 | err = -ENOENT; | ||
1470 | if (!nd->inode) | ||
1471 | break; | ||
1472 | if (lookup_flags & LOOKUP_DIRECTORY) { | ||
1473 | err = -ENOTDIR; | ||
1474 | if (!nd->inode->i_op->lookup) | ||
1475 | break; | ||
1476 | } | ||
1477 | goto return_base; | ||
1478 | lookup_parent: | ||
1479 | nd->last = this; | 1479 | nd->last = this; |
1480 | nd->last_type = LAST_NORM; | 1480 | nd->last_type = type; |
1481 | if (this.name[0] != '.') | ||
1482 | goto return_base; | ||
1483 | if (this.len == 1) | ||
1484 | nd->last_type = LAST_DOT; | ||
1485 | else if (this.len == 2 && this.name[1] == '.') | ||
1486 | nd->last_type = LAST_DOTDOT; | ||
1487 | else | ||
1488 | goto return_base; | ||
1489 | return_reval: | ||
1490 | /* | ||
1491 | * We bypassed the ordinary revalidation routines. | ||
1492 | * We may need to check the cached dentry for staleness. | ||
1493 | */ | ||
1494 | if (need_reval_dot(nd->path.dentry)) { | ||
1495 | if (nameidata_drop_rcu_last_maybe(nd)) | ||
1496 | return -ECHILD; | ||
1497 | /* Note: we do not d_invalidate() */ | ||
1498 | err = d_revalidate(nd->path.dentry, nd); | ||
1499 | if (!err) | ||
1500 | err = -ESTALE; | ||
1501 | if (err < 0) | ||
1502 | break; | ||
1503 | return 0; | ||
1504 | } | ||
1505 | return_base: | ||
1506 | if (nameidata_drop_rcu_last_maybe(nd)) | ||
1507 | return -ECHILD; | ||
1508 | return 0; | 1481 | return 0; |
1509 | out_dput: | ||
1510 | if (!(nd->flags & LOOKUP_RCU)) | ||
1511 | path_put_conditional(&next, nd); | ||
1512 | break; | ||
1513 | } | 1482 | } |
1514 | if (!(nd->flags & LOOKUP_RCU)) | 1483 | terminate_walk(nd); |
1515 | path_put(&nd->path); | ||
1516 | return_err: | ||
1517 | return err; | 1484 | return err; |
1518 | } | 1485 | } |
1519 | 1486 | ||
1520 | static inline int path_walk_rcu(const char *name, struct nameidata *nd) | 1487 | static int path_init(int dfd, const char *name, unsigned int flags, |
1521 | { | 1488 | struct nameidata *nd, struct file **fp) |
1522 | current->total_link_count = 0; | ||
1523 | |||
1524 | return link_path_walk(name, nd); | ||
1525 | } | ||
1526 | |||
1527 | static inline int path_walk_simple(const char *name, struct nameidata *nd) | ||
1528 | { | ||
1529 | current->total_link_count = 0; | ||
1530 | |||
1531 | return link_path_walk(name, nd); | ||
1532 | } | ||
1533 | |||
1534 | static int path_walk(const char *name, struct nameidata *nd) | ||
1535 | { | ||
1536 | struct path save = nd->path; | ||
1537 | int result; | ||
1538 | |||
1539 | current->total_link_count = 0; | ||
1540 | |||
1541 | /* make sure the stuff we saved doesn't go away */ | ||
1542 | path_get(&save); | ||
1543 | |||
1544 | result = link_path_walk(name, nd); | ||
1545 | if (result == -ESTALE) { | ||
1546 | /* nd->path had been dropped */ | ||
1547 | current->total_link_count = 0; | ||
1548 | nd->path = save; | ||
1549 | nd->inode = save.dentry->d_inode; | ||
1550 | path_get(&nd->path); | ||
1551 | nd->flags |= LOOKUP_REVAL; | ||
1552 | result = link_path_walk(name, nd); | ||
1553 | } | ||
1554 | |||
1555 | path_put(&save); | ||
1556 | |||
1557 | return result; | ||
1558 | } | ||
1559 | |||
1560 | static void path_finish_rcu(struct nameidata *nd) | ||
1561 | { | ||
1562 | if (nd->flags & LOOKUP_RCU) { | ||
1563 | /* RCU dangling. Cancel it. */ | ||
1564 | nd->flags &= ~LOOKUP_RCU; | ||
1565 | nd->root.mnt = NULL; | ||
1566 | rcu_read_unlock(); | ||
1567 | br_read_unlock(vfsmount_lock); | ||
1568 | } | ||
1569 | if (nd->file) | ||
1570 | fput(nd->file); | ||
1571 | } | ||
1572 | |||
1573 | static int path_init_rcu(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | ||
1574 | { | 1489 | { |
1575 | int retval = 0; | 1490 | int retval = 0; |
1576 | int fput_needed; | 1491 | int fput_needed; |
1577 | struct file *file; | 1492 | struct file *file; |
1578 | 1493 | ||
1579 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | 1494 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ |
1580 | nd->flags = flags | LOOKUP_RCU; | 1495 | nd->flags = flags | LOOKUP_JUMPED; |
1581 | nd->depth = 0; | 1496 | nd->depth = 0; |
1497 | if (flags & LOOKUP_ROOT) { | ||
1498 | struct inode *inode = nd->root.dentry->d_inode; | ||
1499 | if (*name) { | ||
1500 | if (!inode->i_op->lookup) | ||
1501 | return -ENOTDIR; | ||
1502 | retval = inode_permission(inode, MAY_EXEC); | ||
1503 | if (retval) | ||
1504 | return retval; | ||
1505 | } | ||
1506 | nd->path = nd->root; | ||
1507 | nd->inode = inode; | ||
1508 | if (flags & LOOKUP_RCU) { | ||
1509 | br_read_lock(vfsmount_lock); | ||
1510 | rcu_read_lock(); | ||
1511 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1512 | } else { | ||
1513 | path_get(&nd->path); | ||
1514 | } | ||
1515 | return 0; | ||
1516 | } | ||
1517 | |||
1582 | nd->root.mnt = NULL; | 1518 | nd->root.mnt = NULL; |
1583 | nd->file = NULL; | ||
1584 | 1519 | ||
1585 | if (*name=='/') { | 1520 | if (*name=='/') { |
1586 | struct fs_struct *fs = current->fs; | 1521 | if (flags & LOOKUP_RCU) { |
1587 | unsigned seq; | 1522 | br_read_lock(vfsmount_lock); |
1588 | 1523 | rcu_read_lock(); | |
1589 | br_read_lock(vfsmount_lock); | 1524 | set_root_rcu(nd); |
1590 | rcu_read_lock(); | 1525 | } else { |
1591 | 1526 | set_root(nd); | |
1592 | do { | 1527 | path_get(&nd->root); |
1593 | seq = read_seqcount_begin(&fs->seq); | 1528 | } |
1594 | nd->root = fs->root; | 1529 | nd->path = nd->root; |
1595 | nd->path = nd->root; | ||
1596 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1597 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1598 | |||
1599 | } else if (dfd == AT_FDCWD) { | 1530 | } else if (dfd == AT_FDCWD) { |
1600 | struct fs_struct *fs = current->fs; | 1531 | if (flags & LOOKUP_RCU) { |
1601 | unsigned seq; | 1532 | struct fs_struct *fs = current->fs; |
1602 | 1533 | unsigned seq; | |
1603 | br_read_lock(vfsmount_lock); | ||
1604 | rcu_read_lock(); | ||
1605 | 1534 | ||
1606 | do { | 1535 | br_read_lock(vfsmount_lock); |
1607 | seq = read_seqcount_begin(&fs->seq); | 1536 | rcu_read_lock(); |
1608 | nd->path = fs->pwd; | ||
1609 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1610 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1611 | 1537 | ||
1538 | do { | ||
1539 | seq = read_seqcount_begin(&fs->seq); | ||
1540 | nd->path = fs->pwd; | ||
1541 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | ||
1542 | } while (read_seqcount_retry(&fs->seq, seq)); | ||
1543 | } else { | ||
1544 | get_fs_pwd(current->fs, &nd->path); | ||
1545 | } | ||
1612 | } else { | 1546 | } else { |
1613 | struct dentry *dentry; | 1547 | struct dentry *dentry; |
1614 | 1548 | ||
1615 | file = fget_light(dfd, &fput_needed); | 1549 | file = fget_raw_light(dfd, &fput_needed); |
1616 | retval = -EBADF; | 1550 | retval = -EBADF; |
1617 | if (!file) | 1551 | if (!file) |
1618 | goto out_fail; | 1552 | goto out_fail; |
1619 | 1553 | ||
1620 | dentry = file->f_path.dentry; | 1554 | dentry = file->f_path.dentry; |
1621 | 1555 | ||
1622 | retval = -ENOTDIR; | 1556 | if (*name) { |
1623 | if (!S_ISDIR(dentry->d_inode->i_mode)) | 1557 | retval = -ENOTDIR; |
1624 | goto fput_fail; | 1558 | if (!S_ISDIR(dentry->d_inode->i_mode)) |
1559 | goto fput_fail; | ||
1625 | 1560 | ||
1626 | retval = file_permission(file, MAY_EXEC); | 1561 | retval = file_permission(file, MAY_EXEC); |
1627 | if (retval) | 1562 | if (retval) |
1628 | goto fput_fail; | 1563 | goto fput_fail; |
1564 | } | ||
1629 | 1565 | ||
1630 | nd->path = file->f_path; | 1566 | nd->path = file->f_path; |
1631 | if (fput_needed) | 1567 | if (flags & LOOKUP_RCU) { |
1632 | nd->file = file; | 1568 | if (fput_needed) |
1633 | 1569 | *fp = file; | |
1634 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); | 1570 | nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); |
1635 | br_read_lock(vfsmount_lock); | 1571 | br_read_lock(vfsmount_lock); |
1636 | rcu_read_lock(); | 1572 | rcu_read_lock(); |
1573 | } else { | ||
1574 | path_get(&file->f_path); | ||
1575 | fput_light(file, fput_needed); | ||
1576 | } | ||
1637 | } | 1577 | } |
1578 | |||
1638 | nd->inode = nd->path.dentry->d_inode; | 1579 | nd->inode = nd->path.dentry->d_inode; |
1639 | return 0; | 1580 | return 0; |
1640 | 1581 | ||
@@ -1644,60 +1585,23 @@ out_fail: | |||
1644 | return retval; | 1585 | return retval; |
1645 | } | 1586 | } |
1646 | 1587 | ||
1647 | static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd) | 1588 | static inline int lookup_last(struct nameidata *nd, struct path *path) |
1648 | { | 1589 | { |
1649 | int retval = 0; | 1590 | if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) |
1650 | int fput_needed; | 1591 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; |
1651 | struct file *file; | ||
1652 | |||
1653 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | ||
1654 | nd->flags = flags; | ||
1655 | nd->depth = 0; | ||
1656 | nd->root.mnt = NULL; | ||
1657 | |||
1658 | if (*name=='/') { | ||
1659 | set_root(nd); | ||
1660 | nd->path = nd->root; | ||
1661 | path_get(&nd->root); | ||
1662 | } else if (dfd == AT_FDCWD) { | ||
1663 | get_fs_pwd(current->fs, &nd->path); | ||
1664 | } else { | ||
1665 | struct dentry *dentry; | ||
1666 | |||
1667 | file = fget_light(dfd, &fput_needed); | ||
1668 | retval = -EBADF; | ||
1669 | if (!file) | ||
1670 | goto out_fail; | ||
1671 | |||
1672 | dentry = file->f_path.dentry; | ||
1673 | |||
1674 | retval = -ENOTDIR; | ||
1675 | if (!S_ISDIR(dentry->d_inode->i_mode)) | ||
1676 | goto fput_fail; | ||
1677 | |||
1678 | retval = file_permission(file, MAY_EXEC); | ||
1679 | if (retval) | ||
1680 | goto fput_fail; | ||
1681 | 1592 | ||
1682 | nd->path = file->f_path; | 1593 | nd->flags &= ~LOOKUP_PARENT; |
1683 | path_get(&file->f_path); | 1594 | return walk_component(nd, path, &nd->last, nd->last_type, |
1684 | 1595 | nd->flags & LOOKUP_FOLLOW); | |
1685 | fput_light(file, fput_needed); | ||
1686 | } | ||
1687 | nd->inode = nd->path.dentry->d_inode; | ||
1688 | return 0; | ||
1689 | |||
1690 | fput_fail: | ||
1691 | fput_light(file, fput_needed); | ||
1692 | out_fail: | ||
1693 | return retval; | ||
1694 | } | 1596 | } |
1695 | 1597 | ||
1696 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ | 1598 | /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ |
1697 | static int do_path_lookup(int dfd, const char *name, | 1599 | static int path_lookupat(int dfd, const char *name, |
1698 | unsigned int flags, struct nameidata *nd) | 1600 | unsigned int flags, struct nameidata *nd) |
1699 | { | 1601 | { |
1700 | int retval; | 1602 | struct file *base = NULL; |
1603 | struct path path; | ||
1604 | int err; | ||
1701 | 1605 | ||
1702 | /* | 1606 | /* |
1703 | * Path walking is largely split up into 2 different synchronisation | 1607 | * Path walking is largely split up into 2 different synchronisation |
@@ -1713,44 +1617,75 @@ static int do_path_lookup(int dfd, const char *name, | |||
1713 | * be handled by restarting a traditional ref-walk (which will always | 1617 | * be handled by restarting a traditional ref-walk (which will always |
1714 | * be able to complete). | 1618 | * be able to complete). |
1715 | */ | 1619 | */ |
1716 | retval = path_init_rcu(dfd, name, flags, nd); | 1620 | err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base); |
1717 | if (unlikely(retval)) | 1621 | |
1718 | return retval; | 1622 | if (unlikely(err)) |
1719 | retval = path_walk_rcu(name, nd); | 1623 | return err; |
1720 | path_finish_rcu(nd); | 1624 | |
1721 | if (nd->root.mnt) { | 1625 | current->total_link_count = 0; |
1722 | path_put(&nd->root); | 1626 | err = link_path_walk(name, nd); |
1723 | nd->root.mnt = NULL; | 1627 | |
1628 | if (!err && !(flags & LOOKUP_PARENT)) { | ||
1629 | err = lookup_last(nd, &path); | ||
1630 | while (err > 0) { | ||
1631 | void *cookie; | ||
1632 | struct path link = path; | ||
1633 | nd->flags |= LOOKUP_PARENT; | ||
1634 | err = follow_link(&link, nd, &cookie); | ||
1635 | if (!err) | ||
1636 | err = lookup_last(nd, &path); | ||
1637 | put_link(nd, &link, cookie); | ||
1638 | } | ||
1724 | } | 1639 | } |
1725 | 1640 | ||
1726 | if (unlikely(retval == -ECHILD || retval == -ESTALE)) { | 1641 | if (nd->flags & LOOKUP_RCU) { |
1727 | /* slower, locked walk */ | 1642 | /* went all way through without dropping RCU */ |
1728 | if (retval == -ESTALE) | 1643 | BUG_ON(err); |
1729 | flags |= LOOKUP_REVAL; | 1644 | if (nameidata_drop_rcu_last(nd)) |
1730 | retval = path_init(dfd, name, flags, nd); | 1645 | err = -ECHILD; |
1731 | if (unlikely(retval)) | 1646 | } |
1732 | return retval; | 1647 | |
1733 | retval = path_walk(name, nd); | 1648 | if (!err) |
1734 | if (nd->root.mnt) { | 1649 | err = handle_reval_path(nd); |
1735 | path_put(&nd->root); | 1650 | |
1736 | nd->root.mnt = NULL; | 1651 | if (!err && nd->flags & LOOKUP_DIRECTORY) { |
1652 | if (!nd->inode->i_op->lookup) { | ||
1653 | path_put(&nd->path); | ||
1654 | return -ENOTDIR; | ||
1737 | } | 1655 | } |
1738 | } | 1656 | } |
1739 | 1657 | ||
1658 | if (base) | ||
1659 | fput(base); | ||
1660 | |||
1661 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { | ||
1662 | path_put(&nd->root); | ||
1663 | nd->root.mnt = NULL; | ||
1664 | } | ||
1665 | return err; | ||
1666 | } | ||
1667 | |||
1668 | static int do_path_lookup(int dfd, const char *name, | ||
1669 | unsigned int flags, struct nameidata *nd) | ||
1670 | { | ||
1671 | int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); | ||
1672 | if (unlikely(retval == -ECHILD)) | ||
1673 | retval = path_lookupat(dfd, name, flags, nd); | ||
1674 | if (unlikely(retval == -ESTALE)) | ||
1675 | retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); | ||
1676 | |||
1740 | if (likely(!retval)) { | 1677 | if (likely(!retval)) { |
1741 | if (unlikely(!audit_dummy_context())) { | 1678 | if (unlikely(!audit_dummy_context())) { |
1742 | if (nd->path.dentry && nd->inode) | 1679 | if (nd->path.dentry && nd->inode) |
1743 | audit_inode(name, nd->path.dentry); | 1680 | audit_inode(name, nd->path.dentry); |
1744 | } | 1681 | } |
1745 | } | 1682 | } |
1746 | |||
1747 | return retval; | 1683 | return retval; |
1748 | } | 1684 | } |
1749 | 1685 | ||
1750 | int path_lookup(const char *name, unsigned int flags, | 1686 | int kern_path_parent(const char *name, struct nameidata *nd) |
1751 | struct nameidata *nd) | ||
1752 | { | 1687 | { |
1753 | return do_path_lookup(AT_FDCWD, name, flags, nd); | 1688 | return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); |
1754 | } | 1689 | } |
1755 | 1690 | ||
1756 | int kern_path(const char *name, unsigned int flags, struct path *path) | 1691 | int kern_path(const char *name, unsigned int flags, struct path *path) |
@@ -1774,29 +1709,10 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1774 | const char *name, unsigned int flags, | 1709 | const char *name, unsigned int flags, |
1775 | struct nameidata *nd) | 1710 | struct nameidata *nd) |
1776 | { | 1711 | { |
1777 | int retval; | 1712 | nd->root.dentry = dentry; |
1778 | 1713 | nd->root.mnt = mnt; | |
1779 | /* same as do_path_lookup */ | 1714 | /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ |
1780 | nd->last_type = LAST_ROOT; | 1715 | return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd); |
1781 | nd->flags = flags; | ||
1782 | nd->depth = 0; | ||
1783 | |||
1784 | nd->path.dentry = dentry; | ||
1785 | nd->path.mnt = mnt; | ||
1786 | path_get(&nd->path); | ||
1787 | nd->root = nd->path; | ||
1788 | path_get(&nd->root); | ||
1789 | nd->inode = nd->path.dentry->d_inode; | ||
1790 | |||
1791 | retval = path_walk(name, nd); | ||
1792 | if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && | ||
1793 | nd->inode)) | ||
1794 | audit_inode(name, nd->path.dentry); | ||
1795 | |||
1796 | path_put(&nd->root); | ||
1797 | nd->root.mnt = NULL; | ||
1798 | |||
1799 | return retval; | ||
1800 | } | 1716 | } |
1801 | 1717 | ||
1802 | static struct dentry *__lookup_hash(struct qstr *name, | 1718 | static struct dentry *__lookup_hash(struct qstr *name, |
@@ -1811,17 +1727,6 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1811 | return ERR_PTR(err); | 1727 | return ERR_PTR(err); |
1812 | 1728 | ||
1813 | /* | 1729 | /* |
1814 | * See if the low-level filesystem might want | ||
1815 | * to use its own hash.. | ||
1816 | */ | ||
1817 | if (base->d_flags & DCACHE_OP_HASH) { | ||
1818 | err = base->d_op->d_hash(base, inode, name); | ||
1819 | dentry = ERR_PTR(err); | ||
1820 | if (err < 0) | ||
1821 | goto out; | ||
1822 | } | ||
1823 | |||
1824 | /* | ||
1825 | * Don't bother with __d_lookup: callers are for creat as | 1730 | * Don't bother with __d_lookup: callers are for creat as |
1826 | * well as unlink, so a lot of the time it would cost | 1731 | * well as unlink, so a lot of the time it would cost |
1827 | * a double lookup. | 1732 | * a double lookup. |
@@ -1833,7 +1738,7 @@ static struct dentry *__lookup_hash(struct qstr *name, | |||
1833 | 1738 | ||
1834 | if (!dentry) | 1739 | if (!dentry) |
1835 | dentry = d_alloc_and_lookup(base, name, nd); | 1740 | dentry = d_alloc_and_lookup(base, name, nd); |
1836 | out: | 1741 | |
1837 | return dentry; | 1742 | return dentry; |
1838 | } | 1743 | } |
1839 | 1744 | ||
@@ -1847,28 +1752,6 @@ static struct dentry *lookup_hash(struct nameidata *nd) | |||
1847 | return __lookup_hash(&nd->last, nd->path.dentry, nd); | 1752 | return __lookup_hash(&nd->last, nd->path.dentry, nd); |
1848 | } | 1753 | } |
1849 | 1754 | ||
1850 | static int __lookup_one_len(const char *name, struct qstr *this, | ||
1851 | struct dentry *base, int len) | ||
1852 | { | ||
1853 | unsigned long hash; | ||
1854 | unsigned int c; | ||
1855 | |||
1856 | this->name = name; | ||
1857 | this->len = len; | ||
1858 | if (!len) | ||
1859 | return -EACCES; | ||
1860 | |||
1861 | hash = init_name_hash(); | ||
1862 | while (len--) { | ||
1863 | c = *(const unsigned char *)name++; | ||
1864 | if (c == '/' || c == '\0') | ||
1865 | return -EACCES; | ||
1866 | hash = partial_name_hash(c, hash); | ||
1867 | } | ||
1868 | this->hash = end_name_hash(hash); | ||
1869 | return 0; | ||
1870 | } | ||
1871 | |||
1872 | /** | 1755 | /** |
1873 | * lookup_one_len - filesystem helper to lookup single pathname component | 1756 | * lookup_one_len - filesystem helper to lookup single pathname component |
1874 | * @name: pathname component to lookup | 1757 | * @name: pathname component to lookup |
@@ -1882,14 +1765,34 @@ static int __lookup_one_len(const char *name, struct qstr *this, | |||
1882 | */ | 1765 | */ |
1883 | struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | 1766 | struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) |
1884 | { | 1767 | { |
1885 | int err; | ||
1886 | struct qstr this; | 1768 | struct qstr this; |
1769 | unsigned long hash; | ||
1770 | unsigned int c; | ||
1887 | 1771 | ||
1888 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); | 1772 | WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex)); |
1889 | 1773 | ||
1890 | err = __lookup_one_len(name, &this, base, len); | 1774 | this.name = name; |
1891 | if (err) | 1775 | this.len = len; |
1892 | return ERR_PTR(err); | 1776 | if (!len) |
1777 | return ERR_PTR(-EACCES); | ||
1778 | |||
1779 | hash = init_name_hash(); | ||
1780 | while (len--) { | ||
1781 | c = *(const unsigned char *)name++; | ||
1782 | if (c == '/' || c == '\0') | ||
1783 | return ERR_PTR(-EACCES); | ||
1784 | hash = partial_name_hash(c, hash); | ||
1785 | } | ||
1786 | this.hash = end_name_hash(hash); | ||
1787 | /* | ||
1788 | * See if the low-level filesystem might want | ||
1789 | * to use its own hash.. | ||
1790 | */ | ||
1791 | if (base->d_flags & DCACHE_OP_HASH) { | ||
1792 | int err = base->d_op->d_hash(base, base->d_inode, &this); | ||
1793 | if (err < 0) | ||
1794 | return ERR_PTR(err); | ||
1795 | } | ||
1893 | 1796 | ||
1894 | return __lookup_hash(&this, base, NULL); | 1797 | return __lookup_hash(&this, base, NULL); |
1895 | } | 1798 | } |
@@ -1898,7 +1801,7 @@ int user_path_at(int dfd, const char __user *name, unsigned flags, | |||
1898 | struct path *path) | 1801 | struct path *path) |
1899 | { | 1802 | { |
1900 | struct nameidata nd; | 1803 | struct nameidata nd; |
1901 | char *tmp = getname(name); | 1804 | char *tmp = getname_flags(name, flags); |
1902 | int err = PTR_ERR(tmp); | 1805 | int err = PTR_ERR(tmp); |
1903 | if (!IS_ERR(tmp)) { | 1806 | if (!IS_ERR(tmp)) { |
1904 | 1807 | ||
@@ -2078,12 +1981,16 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
2078 | return error; | 1981 | return error; |
2079 | } | 1982 | } |
2080 | 1983 | ||
2081 | int may_open(struct path *path, int acc_mode, int flag) | 1984 | static int may_open(struct path *path, int acc_mode, int flag) |
2082 | { | 1985 | { |
2083 | struct dentry *dentry = path->dentry; | 1986 | struct dentry *dentry = path->dentry; |
2084 | struct inode *inode = dentry->d_inode; | 1987 | struct inode *inode = dentry->d_inode; |
2085 | int error; | 1988 | int error; |
2086 | 1989 | ||
1990 | /* O_PATH? */ | ||
1991 | if (!acc_mode) | ||
1992 | return 0; | ||
1993 | |||
2087 | if (!inode) | 1994 | if (!inode) |
2088 | return -ENOENT; | 1995 | return -ENOENT; |
2089 | 1996 | ||
@@ -2152,34 +2059,6 @@ static int handle_truncate(struct file *filp) | |||
2152 | } | 2059 | } |
2153 | 2060 | ||
2154 | /* | 2061 | /* |
2155 | * Be careful about ever adding any more callers of this | ||
2156 | * function. Its flags must be in the namei format, not | ||
2157 | * what get passed to sys_open(). | ||
2158 | */ | ||
2159 | static int __open_namei_create(struct nameidata *nd, struct path *path, | ||
2160 | int open_flag, int mode) | ||
2161 | { | ||
2162 | int error; | ||
2163 | struct dentry *dir = nd->path.dentry; | ||
2164 | |||
2165 | if (!IS_POSIXACL(dir->d_inode)) | ||
2166 | mode &= ~current_umask(); | ||
2167 | error = security_path_mknod(&nd->path, path->dentry, mode, 0); | ||
2168 | if (error) | ||
2169 | goto out_unlock; | ||
2170 | error = vfs_create(dir->d_inode, path->dentry, mode, nd); | ||
2171 | out_unlock: | ||
2172 | mutex_unlock(&dir->d_inode->i_mutex); | ||
2173 | dput(nd->path.dentry); | ||
2174 | nd->path.dentry = path->dentry; | ||
2175 | |||
2176 | if (error) | ||
2177 | return error; | ||
2178 | /* Don't check for write permission, don't truncate */ | ||
2179 | return may_open(&nd->path, 0, open_flag & ~O_TRUNC); | ||
2180 | } | ||
2181 | |||
2182 | /* | ||
2183 | * Note that while the flag value (low two bits) for sys_open means: | 2062 | * Note that while the flag value (low two bits) for sys_open means: |
2184 | * 00 - read-only | 2063 | * 00 - read-only |
2185 | * 01 - write-only | 2064 | * 01 - write-only |
@@ -2203,126 +2082,115 @@ static inline int open_to_namei_flags(int flag) | |||
2203 | return flag; | 2082 | return flag; |
2204 | } | 2083 | } |
2205 | 2084 | ||
2206 | static int open_will_truncate(int flag, struct inode *inode) | ||
2207 | { | ||
2208 | /* | ||
2209 | * We'll never write to the fs underlying | ||
2210 | * a device file. | ||
2211 | */ | ||
2212 | if (special_file(inode->i_mode)) | ||
2213 | return 0; | ||
2214 | return (flag & O_TRUNC); | ||
2215 | } | ||
2216 | |||
2217 | static struct file *finish_open(struct nameidata *nd, | ||
2218 | int open_flag, int acc_mode) | ||
2219 | { | ||
2220 | struct file *filp; | ||
2221 | int will_truncate; | ||
2222 | int error; | ||
2223 | |||
2224 | will_truncate = open_will_truncate(open_flag, nd->path.dentry->d_inode); | ||
2225 | if (will_truncate) { | ||
2226 | error = mnt_want_write(nd->path.mnt); | ||
2227 | if (error) | ||
2228 | goto exit; | ||
2229 | } | ||
2230 | error = may_open(&nd->path, acc_mode, open_flag); | ||
2231 | if (error) { | ||
2232 | if (will_truncate) | ||
2233 | mnt_drop_write(nd->path.mnt); | ||
2234 | goto exit; | ||
2235 | } | ||
2236 | filp = nameidata_to_filp(nd); | ||
2237 | if (!IS_ERR(filp)) { | ||
2238 | error = ima_file_check(filp, acc_mode); | ||
2239 | if (error) { | ||
2240 | fput(filp); | ||
2241 | filp = ERR_PTR(error); | ||
2242 | } | ||
2243 | } | ||
2244 | if (!IS_ERR(filp)) { | ||
2245 | if (will_truncate) { | ||
2246 | error = handle_truncate(filp); | ||
2247 | if (error) { | ||
2248 | fput(filp); | ||
2249 | filp = ERR_PTR(error); | ||
2250 | } | ||
2251 | } | ||
2252 | } | ||
2253 | /* | ||
2254 | * It is now safe to drop the mnt write | ||
2255 | * because the filp has had a write taken | ||
2256 | * on its behalf. | ||
2257 | */ | ||
2258 | if (will_truncate) | ||
2259 | mnt_drop_write(nd->path.mnt); | ||
2260 | path_put(&nd->path); | ||
2261 | return filp; | ||
2262 | |||
2263 | exit: | ||
2264 | path_put(&nd->path); | ||
2265 | return ERR_PTR(error); | ||
2266 | } | ||
2267 | |||
2268 | /* | 2085 | /* |
2269 | * Handle O_CREAT case for do_filp_open | 2086 | * Handle the last step of open() |
2270 | */ | 2087 | */ |
2271 | static struct file *do_last(struct nameidata *nd, struct path *path, | 2088 | static struct file *do_last(struct nameidata *nd, struct path *path, |
2272 | int open_flag, int acc_mode, | 2089 | const struct open_flags *op, const char *pathname) |
2273 | int mode, const char *pathname) | ||
2274 | { | 2090 | { |
2275 | struct dentry *dir = nd->path.dentry; | 2091 | struct dentry *dir = nd->path.dentry; |
2092 | struct dentry *dentry; | ||
2093 | int open_flag = op->open_flag; | ||
2094 | int will_truncate = open_flag & O_TRUNC; | ||
2095 | int want_write = 0; | ||
2096 | int acc_mode = op->acc_mode; | ||
2276 | struct file *filp; | 2097 | struct file *filp; |
2277 | int error = -EISDIR; | 2098 | int error; |
2099 | |||
2100 | nd->flags &= ~LOOKUP_PARENT; | ||
2101 | nd->flags |= op->intent; | ||
2278 | 2102 | ||
2279 | switch (nd->last_type) { | 2103 | switch (nd->last_type) { |
2280 | case LAST_DOTDOT: | 2104 | case LAST_DOTDOT: |
2281 | follow_dotdot(nd); | ||
2282 | dir = nd->path.dentry; | ||
2283 | case LAST_DOT: | 2105 | case LAST_DOT: |
2284 | if (need_reval_dot(dir)) { | 2106 | error = handle_dots(nd, nd->last_type); |
2285 | int status = d_revalidate(nd->path.dentry, nd); | 2107 | if (error) |
2286 | if (!status) | 2108 | return ERR_PTR(error); |
2287 | status = -ESTALE; | ||
2288 | if (status < 0) { | ||
2289 | error = status; | ||
2290 | goto exit; | ||
2291 | } | ||
2292 | } | ||
2293 | /* fallthrough */ | 2109 | /* fallthrough */ |
2294 | case LAST_ROOT: | 2110 | case LAST_ROOT: |
2295 | goto exit; | 2111 | if (nd->flags & LOOKUP_RCU) { |
2112 | if (nameidata_drop_rcu_last(nd)) | ||
2113 | return ERR_PTR(-ECHILD); | ||
2114 | } | ||
2115 | error = handle_reval_path(nd); | ||
2116 | if (error) | ||
2117 | goto exit; | ||
2118 | audit_inode(pathname, nd->path.dentry); | ||
2119 | if (open_flag & O_CREAT) { | ||
2120 | error = -EISDIR; | ||
2121 | goto exit; | ||
2122 | } | ||
2123 | goto ok; | ||
2296 | case LAST_BIND: | 2124 | case LAST_BIND: |
2125 | /* can't be RCU mode here */ | ||
2126 | error = handle_reval_path(nd); | ||
2127 | if (error) | ||
2128 | goto exit; | ||
2297 | audit_inode(pathname, dir); | 2129 | audit_inode(pathname, dir); |
2298 | goto ok; | 2130 | goto ok; |
2299 | } | 2131 | } |
2300 | 2132 | ||
2133 | if (!(open_flag & O_CREAT)) { | ||
2134 | int symlink_ok = 0; | ||
2135 | if (nd->last.name[nd->last.len]) | ||
2136 | nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | ||
2137 | if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) | ||
2138 | symlink_ok = 1; | ||
2139 | /* we _can_ be in RCU mode here */ | ||
2140 | error = walk_component(nd, path, &nd->last, LAST_NORM, | ||
2141 | !symlink_ok); | ||
2142 | if (error < 0) | ||
2143 | return ERR_PTR(error); | ||
2144 | if (error) /* symlink */ | ||
2145 | return NULL; | ||
2146 | /* sayonara */ | ||
2147 | if (nd->flags & LOOKUP_RCU) { | ||
2148 | if (nameidata_drop_rcu_last(nd)) | ||
2149 | return ERR_PTR(-ECHILD); | ||
2150 | } | ||
2151 | |||
2152 | error = -ENOTDIR; | ||
2153 | if (nd->flags & LOOKUP_DIRECTORY) { | ||
2154 | if (!nd->inode->i_op->lookup) | ||
2155 | goto exit; | ||
2156 | } | ||
2157 | audit_inode(pathname, nd->path.dentry); | ||
2158 | goto ok; | ||
2159 | } | ||
2160 | |||
2161 | /* create side of things */ | ||
2162 | |||
2163 | if (nd->flags & LOOKUP_RCU) { | ||
2164 | if (nameidata_drop_rcu_last(nd)) | ||
2165 | return ERR_PTR(-ECHILD); | ||
2166 | } | ||
2167 | |||
2168 | audit_inode(pathname, dir); | ||
2169 | error = -EISDIR; | ||
2301 | /* trailing slashes? */ | 2170 | /* trailing slashes? */ |
2302 | if (nd->last.name[nd->last.len]) | 2171 | if (nd->last.name[nd->last.len]) |
2303 | goto exit; | 2172 | goto exit; |
2304 | 2173 | ||
2305 | mutex_lock(&dir->d_inode->i_mutex); | 2174 | mutex_lock(&dir->d_inode->i_mutex); |
2306 | 2175 | ||
2307 | path->dentry = lookup_hash(nd); | 2176 | dentry = lookup_hash(nd); |
2308 | path->mnt = nd->path.mnt; | 2177 | error = PTR_ERR(dentry); |
2309 | 2178 | if (IS_ERR(dentry)) { | |
2310 | error = PTR_ERR(path->dentry); | ||
2311 | if (IS_ERR(path->dentry)) { | ||
2312 | mutex_unlock(&dir->d_inode->i_mutex); | 2179 | mutex_unlock(&dir->d_inode->i_mutex); |
2313 | goto exit; | 2180 | goto exit; |
2314 | } | 2181 | } |
2315 | 2182 | ||
2316 | if (IS_ERR(nd->intent.open.file)) { | 2183 | path->dentry = dentry; |
2317 | error = PTR_ERR(nd->intent.open.file); | 2184 | path->mnt = nd->path.mnt; |
2318 | goto exit_mutex_unlock; | ||
2319 | } | ||
2320 | 2185 | ||
2321 | /* Negative dentry, just create the file */ | 2186 | /* Negative dentry, just create the file */ |
2322 | if (!path->dentry->d_inode) { | 2187 | if (!dentry->d_inode) { |
2188 | int mode = op->mode; | ||
2189 | if (!IS_POSIXACL(dir->d_inode)) | ||
2190 | mode &= ~current_umask(); | ||
2323 | /* | 2191 | /* |
2324 | * This write is needed to ensure that a | 2192 | * This write is needed to ensure that a |
2325 | * ro->rw transition does not occur between | 2193 | * rw->ro transition does not occur between |
2326 | * the time when the file is created and when | 2194 | * the time when the file is created and when |
2327 | * a permanent write count is taken through | 2195 | * a permanent write count is taken through |
2328 | * the 'struct file' in nameidata_to_filp(). | 2196 | * the 'struct file' in nameidata_to_filp(). |
@@ -2330,22 +2198,21 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2330 | error = mnt_want_write(nd->path.mnt); | 2198 | error = mnt_want_write(nd->path.mnt); |
2331 | if (error) | 2199 | if (error) |
2332 | goto exit_mutex_unlock; | 2200 | goto exit_mutex_unlock; |
2333 | error = __open_namei_create(nd, path, open_flag, mode); | 2201 | want_write = 1; |
2334 | if (error) { | 2202 | /* Don't check for write permission, don't truncate */ |
2335 | mnt_drop_write(nd->path.mnt); | 2203 | open_flag &= ~O_TRUNC; |
2336 | goto exit; | 2204 | will_truncate = 0; |
2337 | } | 2205 | acc_mode = MAY_OPEN; |
2338 | filp = nameidata_to_filp(nd); | 2206 | error = security_path_mknod(&nd->path, dentry, mode, 0); |
2339 | mnt_drop_write(nd->path.mnt); | 2207 | if (error) |
2340 | path_put(&nd->path); | 2208 | goto exit_mutex_unlock; |
2341 | if (!IS_ERR(filp)) { | 2209 | error = vfs_create(dir->d_inode, dentry, mode, nd); |
2342 | error = ima_file_check(filp, acc_mode); | 2210 | if (error) |
2343 | if (error) { | 2211 | goto exit_mutex_unlock; |
2344 | fput(filp); | 2212 | mutex_unlock(&dir->d_inode->i_mutex); |
2345 | filp = ERR_PTR(error); | 2213 | dput(nd->path.dentry); |
2346 | } | 2214 | nd->path.dentry = dentry; |
2347 | } | 2215 | goto common; |
2348 | return filp; | ||
2349 | } | 2216 | } |
2350 | 2217 | ||
2351 | /* | 2218 | /* |
@@ -2375,7 +2242,40 @@ static struct file *do_last(struct nameidata *nd, struct path *path, | |||
2375 | if (S_ISDIR(nd->inode->i_mode)) | 2242 | if (S_ISDIR(nd->inode->i_mode)) |
2376 | goto exit; | 2243 | goto exit; |
2377 | ok: | 2244 | ok: |
2378 | filp = finish_open(nd, open_flag, acc_mode); | 2245 | if (!S_ISREG(nd->inode->i_mode)) |
2246 | will_truncate = 0; | ||
2247 | |||
2248 | if (will_truncate) { | ||
2249 | error = mnt_want_write(nd->path.mnt); | ||
2250 | if (error) | ||
2251 | goto exit; | ||
2252 | want_write = 1; | ||
2253 | } | ||
2254 | common: | ||
2255 | error = may_open(&nd->path, acc_mode, open_flag); | ||
2256 | if (error) | ||
2257 | goto exit; | ||
2258 | filp = nameidata_to_filp(nd); | ||
2259 | if (!IS_ERR(filp)) { | ||
2260 | error = ima_file_check(filp, op->acc_mode); | ||
2261 | if (error) { | ||
2262 | fput(filp); | ||
2263 | filp = ERR_PTR(error); | ||
2264 | } | ||
2265 | } | ||
2266 | if (!IS_ERR(filp)) { | ||
2267 | if (will_truncate) { | ||
2268 | error = handle_truncate(filp); | ||
2269 | if (error) { | ||
2270 | fput(filp); | ||
2271 | filp = ERR_PTR(error); | ||
2272 | } | ||
2273 | } | ||
2274 | } | ||
2275 | out: | ||
2276 | if (want_write) | ||
2277 | mnt_drop_write(nd->path.mnt); | ||
2278 | path_put(&nd->path); | ||
2379 | return filp; | 2279 | return filp; |
2380 | 2280 | ||
2381 | exit_mutex_unlock: | 2281 | exit_mutex_unlock: |
@@ -2383,204 +2283,103 @@ exit_mutex_unlock: | |||
2383 | exit_dput: | 2283 | exit_dput: |
2384 | path_put_conditional(path, nd); | 2284 | path_put_conditional(path, nd); |
2385 | exit: | 2285 | exit: |
2386 | path_put(&nd->path); | 2286 | filp = ERR_PTR(error); |
2387 | return ERR_PTR(error); | 2287 | goto out; |
2388 | } | 2288 | } |
2389 | 2289 | ||
2390 | /* | 2290 | static struct file *path_openat(int dfd, const char *pathname, |
2391 | * Note that the low bits of the passed in "open_flag" | 2291 | struct nameidata *nd, const struct open_flags *op, int flags) |
2392 | * are not the same as in the local variable "flag". See | ||
2393 | * open_to_namei_flags() for more details. | ||
2394 | */ | ||
2395 | struct file *do_filp_open(int dfd, const char *pathname, | ||
2396 | int open_flag, int mode, int acc_mode) | ||
2397 | { | 2292 | { |
2293 | struct file *base = NULL; | ||
2398 | struct file *filp; | 2294 | struct file *filp; |
2399 | struct nameidata nd; | ||
2400 | int error; | ||
2401 | struct path path; | 2295 | struct path path; |
2402 | int count = 0; | 2296 | int error; |
2403 | int flag = open_to_namei_flags(open_flag); | ||
2404 | int flags; | ||
2405 | |||
2406 | if (!(open_flag & O_CREAT)) | ||
2407 | mode = 0; | ||
2408 | |||
2409 | /* Must never be set by userspace */ | ||
2410 | open_flag &= ~FMODE_NONOTIFY; | ||
2411 | |||
2412 | /* | ||
2413 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | ||
2414 | * check for O_DSYNC if the need any syncing at all we enforce it's | ||
2415 | * always set instead of having to deal with possibly weird behaviour | ||
2416 | * for malicious applications setting only __O_SYNC. | ||
2417 | */ | ||
2418 | if (open_flag & __O_SYNC) | ||
2419 | open_flag |= O_DSYNC; | ||
2420 | |||
2421 | if (!acc_mode) | ||
2422 | acc_mode = MAY_OPEN | ACC_MODE(open_flag); | ||
2423 | |||
2424 | /* O_TRUNC implies we need access checks for write permissions */ | ||
2425 | if (open_flag & O_TRUNC) | ||
2426 | acc_mode |= MAY_WRITE; | ||
2427 | |||
2428 | /* Allow the LSM permission hook to distinguish append | ||
2429 | access from general write access. */ | ||
2430 | if (open_flag & O_APPEND) | ||
2431 | acc_mode |= MAY_APPEND; | ||
2432 | |||
2433 | flags = LOOKUP_OPEN; | ||
2434 | if (open_flag & O_CREAT) { | ||
2435 | flags |= LOOKUP_CREATE; | ||
2436 | if (open_flag & O_EXCL) | ||
2437 | flags |= LOOKUP_EXCL; | ||
2438 | } | ||
2439 | if (open_flag & O_DIRECTORY) | ||
2440 | flags |= LOOKUP_DIRECTORY; | ||
2441 | if (!(open_flag & O_NOFOLLOW)) | ||
2442 | flags |= LOOKUP_FOLLOW; | ||
2443 | 2297 | ||
2444 | filp = get_empty_filp(); | 2298 | filp = get_empty_filp(); |
2445 | if (!filp) | 2299 | if (!filp) |
2446 | return ERR_PTR(-ENFILE); | 2300 | return ERR_PTR(-ENFILE); |
2447 | 2301 | ||
2448 | filp->f_flags = open_flag; | 2302 | filp->f_flags = op->open_flag; |
2449 | nd.intent.open.file = filp; | 2303 | nd->intent.open.file = filp; |
2450 | nd.intent.open.flags = flag; | 2304 | nd->intent.open.flags = open_to_namei_flags(op->open_flag); |
2451 | nd.intent.open.create_mode = mode; | 2305 | nd->intent.open.create_mode = op->mode; |
2452 | |||
2453 | if (open_flag & O_CREAT) | ||
2454 | goto creat; | ||
2455 | 2306 | ||
2456 | /* !O_CREAT, simple open */ | 2307 | error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); |
2457 | error = do_path_lookup(dfd, pathname, flags, &nd); | ||
2458 | if (unlikely(error)) | 2308 | if (unlikely(error)) |
2459 | goto out_filp2; | ||
2460 | error = -ELOOP; | ||
2461 | if (!(nd.flags & LOOKUP_FOLLOW)) { | ||
2462 | if (nd.inode->i_op->follow_link) | ||
2463 | goto out_path2; | ||
2464 | } | ||
2465 | error = -ENOTDIR; | ||
2466 | if (nd.flags & LOOKUP_DIRECTORY) { | ||
2467 | if (!nd.inode->i_op->lookup) | ||
2468 | goto out_path2; | ||
2469 | } | ||
2470 | audit_inode(pathname, nd.path.dentry); | ||
2471 | filp = finish_open(&nd, open_flag, acc_mode); | ||
2472 | out2: | ||
2473 | release_open_intent(&nd); | ||
2474 | return filp; | ||
2475 | |||
2476 | out_path2: | ||
2477 | path_put(&nd.path); | ||
2478 | out_filp2: | ||
2479 | filp = ERR_PTR(error); | ||
2480 | goto out2; | ||
2481 | |||
2482 | creat: | ||
2483 | /* OK, have to create the file. Find the parent. */ | ||
2484 | error = path_init_rcu(dfd, pathname, | ||
2485 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
2486 | if (error) | ||
2487 | goto out_filp; | 2309 | goto out_filp; |
2488 | error = path_walk_rcu(pathname, &nd); | ||
2489 | path_finish_rcu(&nd); | ||
2490 | if (unlikely(error == -ECHILD || error == -ESTALE)) { | ||
2491 | /* slower, locked walk */ | ||
2492 | if (error == -ESTALE) { | ||
2493 | reval: | ||
2494 | flags |= LOOKUP_REVAL; | ||
2495 | } | ||
2496 | error = path_init(dfd, pathname, | ||
2497 | LOOKUP_PARENT | (flags & LOOKUP_REVAL), &nd); | ||
2498 | if (error) | ||
2499 | goto out_filp; | ||
2500 | 2310 | ||
2501 | error = path_walk_simple(pathname, &nd); | 2311 | current->total_link_count = 0; |
2502 | } | 2312 | error = link_path_walk(pathname, nd); |
2503 | if (unlikely(error)) | 2313 | if (unlikely(error)) |
2504 | goto out_filp; | 2314 | goto out_filp; |
2505 | if (unlikely(!audit_dummy_context())) | ||
2506 | audit_inode(pathname, nd.path.dentry); | ||
2507 | 2315 | ||
2508 | /* | 2316 | filp = do_last(nd, &path, op, pathname); |
2509 | * We have the parent and last component. | ||
2510 | */ | ||
2511 | nd.flags = flags; | ||
2512 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | ||
2513 | while (unlikely(!filp)) { /* trailing symlink */ | 2317 | while (unlikely(!filp)) { /* trailing symlink */ |
2514 | struct path link = path; | 2318 | struct path link = path; |
2515 | struct inode *linki = link.dentry->d_inode; | ||
2516 | void *cookie; | 2319 | void *cookie; |
2517 | error = -ELOOP; | 2320 | if (!(nd->flags & LOOKUP_FOLLOW)) { |
2518 | if (!(nd.flags & LOOKUP_FOLLOW)) | 2321 | path_put_conditional(&path, nd); |
2519 | goto exit_dput; | 2322 | path_put(&nd->path); |
2520 | if (count++ == 32) | 2323 | filp = ERR_PTR(-ELOOP); |
2521 | goto exit_dput; | 2324 | break; |
2522 | /* | ||
2523 | * This is subtle. Instead of calling do_follow_link() we do | ||
2524 | * the thing by hands. The reason is that this way we have zero | ||
2525 | * link_count and path_walk() (called from ->follow_link) | ||
2526 | * honoring LOOKUP_PARENT. After that we have the parent and | ||
2527 | * last component, i.e. we are in the same situation as after | ||
2528 | * the first path_walk(). Well, almost - if the last component | ||
2529 | * is normal we get its copy stored in nd->last.name and we will | ||
2530 | * have to putname() it when we are done. Procfs-like symlinks | ||
2531 | * just set LAST_BIND. | ||
2532 | */ | ||
2533 | nd.flags |= LOOKUP_PARENT; | ||
2534 | error = security_inode_follow_link(link.dentry, &nd); | ||
2535 | if (error) | ||
2536 | goto exit_dput; | ||
2537 | error = __do_follow_link(&link, &nd, &cookie); | ||
2538 | if (unlikely(error)) { | ||
2539 | if (!IS_ERR(cookie) && linki->i_op->put_link) | ||
2540 | linki->i_op->put_link(link.dentry, &nd, cookie); | ||
2541 | /* nd.path had been dropped */ | ||
2542 | nd.path = link; | ||
2543 | goto out_path; | ||
2544 | } | 2325 | } |
2545 | nd.flags &= ~LOOKUP_PARENT; | 2326 | nd->flags |= LOOKUP_PARENT; |
2546 | filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); | 2327 | nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); |
2547 | if (linki->i_op->put_link) | 2328 | error = follow_link(&link, nd, &cookie); |
2548 | linki->i_op->put_link(link.dentry, &nd, cookie); | 2329 | if (unlikely(error)) |
2549 | path_put(&link); | 2330 | filp = ERR_PTR(error); |
2331 | else | ||
2332 | filp = do_last(nd, &path, op, pathname); | ||
2333 | put_link(nd, &link, cookie); | ||
2550 | } | 2334 | } |
2551 | out: | 2335 | out: |
2552 | if (nd.root.mnt) | 2336 | if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) |
2553 | path_put(&nd.root); | 2337 | path_put(&nd->root); |
2554 | if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL)) | 2338 | if (base) |
2555 | goto reval; | 2339 | fput(base); |
2556 | release_open_intent(&nd); | 2340 | release_open_intent(nd); |
2557 | return filp; | 2341 | return filp; |
2558 | 2342 | ||
2559 | exit_dput: | ||
2560 | path_put_conditional(&path, &nd); | ||
2561 | out_path: | ||
2562 | path_put(&nd.path); | ||
2563 | out_filp: | 2343 | out_filp: |
2564 | filp = ERR_PTR(error); | 2344 | filp = ERR_PTR(error); |
2565 | goto out; | 2345 | goto out; |
2566 | } | 2346 | } |
2567 | 2347 | ||
2568 | /** | 2348 | struct file *do_filp_open(int dfd, const char *pathname, |
2569 | * filp_open - open file and return file pointer | 2349 | const struct open_flags *op, int flags) |
2570 | * | ||
2571 | * @filename: path to open | ||
2572 | * @flags: open flags as per the open(2) second argument | ||
2573 | * @mode: mode for the new file if O_CREAT is set, else ignored | ||
2574 | * | ||
2575 | * This is the helper to open a file from kernelspace if you really | ||
2576 | * have to. But in generally you should not do this, so please move | ||
2577 | * along, nothing to see here.. | ||
2578 | */ | ||
2579 | struct file *filp_open(const char *filename, int flags, int mode) | ||
2580 | { | 2350 | { |
2581 | return do_filp_open(AT_FDCWD, filename, flags, mode, 0); | 2351 | struct nameidata nd; |
2352 | struct file *filp; | ||
2353 | |||
2354 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); | ||
2355 | if (unlikely(filp == ERR_PTR(-ECHILD))) | ||
2356 | filp = path_openat(dfd, pathname, &nd, op, flags); | ||
2357 | if (unlikely(filp == ERR_PTR(-ESTALE))) | ||
2358 | filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); | ||
2359 | return filp; | ||
2360 | } | ||
2361 | |||
2362 | struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, | ||
2363 | const char *name, const struct open_flags *op, int flags) | ||
2364 | { | ||
2365 | struct nameidata nd; | ||
2366 | struct file *file; | ||
2367 | |||
2368 | nd.root.mnt = mnt; | ||
2369 | nd.root.dentry = dentry; | ||
2370 | |||
2371 | flags |= LOOKUP_ROOT; | ||
2372 | |||
2373 | if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN) | ||
2374 | return ERR_PTR(-ELOOP); | ||
2375 | |||
2376 | file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU); | ||
2377 | if (unlikely(file == ERR_PTR(-ECHILD))) | ||
2378 | file = path_openat(-1, name, &nd, op, flags); | ||
2379 | if (unlikely(file == ERR_PTR(-ESTALE))) | ||
2380 | file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL); | ||
2381 | return file; | ||
2582 | } | 2382 | } |
2583 | EXPORT_SYMBOL(filp_open); | ||
2584 | 2383 | ||
2585 | /** | 2384 | /** |
2586 | * lookup_create - lookup a dentry, creating it if it doesn't exist | 2385 | * lookup_create - lookup a dentry, creating it if it doesn't exist |
@@ -3119,7 +2918,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de | |||
3119 | return error; | 2918 | return error; |
3120 | 2919 | ||
3121 | mutex_lock(&inode->i_mutex); | 2920 | mutex_lock(&inode->i_mutex); |
3122 | error = dir->i_op->link(old_dentry, dir, new_dentry); | 2921 | /* Make sure we don't allow creating hardlink to an unlinked file */ |
2922 | if (inode->i_nlink == 0) | ||
2923 | error = -ENOENT; | ||
2924 | else | ||
2925 | error = dir->i_op->link(old_dentry, dir, new_dentry); | ||
3123 | mutex_unlock(&inode->i_mutex); | 2926 | mutex_unlock(&inode->i_mutex); |
3124 | if (!error) | 2927 | if (!error) |
3125 | fsnotify_link(dir, inode, new_dentry); | 2928 | fsnotify_link(dir, inode, new_dentry); |
@@ -3141,15 +2944,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, | |||
3141 | struct dentry *new_dentry; | 2944 | struct dentry *new_dentry; |
3142 | struct nameidata nd; | 2945 | struct nameidata nd; |
3143 | struct path old_path; | 2946 | struct path old_path; |
2947 | int how = 0; | ||
3144 | int error; | 2948 | int error; |
3145 | char *to; | 2949 | char *to; |
3146 | 2950 | ||
3147 | if ((flags & ~AT_SYMLINK_FOLLOW) != 0) | 2951 | if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) |
3148 | return -EINVAL; | 2952 | return -EINVAL; |
2953 | /* | ||
2954 | * To use null names we require CAP_DAC_READ_SEARCH | ||
2955 | * This ensures that not everyone will be able to create | ||
2956 | * handlink using the passed filedescriptor. | ||
2957 | */ | ||
2958 | if (flags & AT_EMPTY_PATH) { | ||
2959 | if (!capable(CAP_DAC_READ_SEARCH)) | ||
2960 | return -ENOENT; | ||
2961 | how = LOOKUP_EMPTY; | ||
2962 | } | ||
2963 | |||
2964 | if (flags & AT_SYMLINK_FOLLOW) | ||
2965 | how |= LOOKUP_FOLLOW; | ||
3149 | 2966 | ||
3150 | error = user_path_at(olddfd, oldname, | 2967 | error = user_path_at(olddfd, oldname, how, &old_path); |
3151 | flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, | ||
3152 | &old_path); | ||
3153 | if (error) | 2968 | if (error) |
3154 | return error; | 2969 | return error; |
3155 | 2970 | ||
@@ -3586,7 +3401,7 @@ EXPORT_SYMBOL(page_readlink); | |||
3586 | EXPORT_SYMBOL(__page_symlink); | 3401 | EXPORT_SYMBOL(__page_symlink); |
3587 | EXPORT_SYMBOL(page_symlink); | 3402 | EXPORT_SYMBOL(page_symlink); |
3588 | EXPORT_SYMBOL(page_symlink_inode_operations); | 3403 | EXPORT_SYMBOL(page_symlink_inode_operations); |
3589 | EXPORT_SYMBOL(path_lookup); | 3404 | EXPORT_SYMBOL(kern_path_parent); |
3590 | EXPORT_SYMBOL(kern_path); | 3405 | EXPORT_SYMBOL(kern_path); |
3591 | EXPORT_SYMBOL(vfs_path_lookup); | 3406 | EXPORT_SYMBOL(vfs_path_lookup); |
3592 | EXPORT_SYMBOL(inode_permission); | 3407 | EXPORT_SYMBOL(inode_permission); |
diff --git a/fs/namespace.c b/fs/namespace.c index a66feed7311..e96e03782de 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -1002,6 +1002,18 @@ const struct seq_operations mounts_op = { | |||
1002 | .show = show_vfsmnt | 1002 | .show = show_vfsmnt |
1003 | }; | 1003 | }; |
1004 | 1004 | ||
1005 | static int uuid_is_nil(u8 *uuid) | ||
1006 | { | ||
1007 | int i; | ||
1008 | u8 *cp = (u8 *)uuid; | ||
1009 | |||
1010 | for (i = 0; i < 16; i++) { | ||
1011 | if (*cp++) | ||
1012 | return 0; | ||
1013 | } | ||
1014 | return 1; | ||
1015 | } | ||
1016 | |||
1005 | static int show_mountinfo(struct seq_file *m, void *v) | 1017 | static int show_mountinfo(struct seq_file *m, void *v) |
1006 | { | 1018 | { |
1007 | struct proc_mounts *p = m->private; | 1019 | struct proc_mounts *p = m->private; |
@@ -1040,6 +1052,10 @@ static int show_mountinfo(struct seq_file *m, void *v) | |||
1040 | if (IS_MNT_UNBINDABLE(mnt)) | 1052 | if (IS_MNT_UNBINDABLE(mnt)) |
1041 | seq_puts(m, " unbindable"); | 1053 | seq_puts(m, " unbindable"); |
1042 | 1054 | ||
1055 | if (!uuid_is_nil(mnt->mnt_sb->s_uuid)) | ||
1056 | /* print the uuid */ | ||
1057 | seq_printf(m, " uuid:%pU", mnt->mnt_sb->s_uuid); | ||
1058 | |||
1043 | /* Filesystem specific data */ | 1059 | /* Filesystem specific data */ |
1044 | seq_puts(m, " - "); | 1060 | seq_puts(m, " - "); |
1045 | show_type(m, sb); | 1061 | show_type(m, sb); |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 2f8e61816d7..01768e5e2c9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -1518,7 +1518,7 @@ static int nfsiod_start(void) | |||
1518 | { | 1518 | { |
1519 | struct workqueue_struct *wq; | 1519 | struct workqueue_struct *wq; |
1520 | dprintk("RPC: creating workqueue nfsiod\n"); | 1520 | dprintk("RPC: creating workqueue nfsiod\n"); |
1521 | wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); | 1521 | wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); |
1522 | if (wq == NULL) | 1522 | if (wq == NULL) |
1523 | return -ENOMEM; | 1523 | return -ENOMEM; |
1524 | nfsiod_workqueue = wq; | 1524 | nfsiod_workqueue = wq; |
diff --git a/fs/nfsctl.c b/fs/nfsctl.c index bf9cbd242dd..124e8fcb0dd 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c | |||
@@ -22,30 +22,17 @@ | |||
22 | 22 | ||
23 | static struct file *do_open(char *name, int flags) | 23 | static struct file *do_open(char *name, int flags) |
24 | { | 24 | { |
25 | struct nameidata nd; | ||
26 | struct vfsmount *mnt; | 25 | struct vfsmount *mnt; |
27 | int error; | 26 | struct file *file; |
28 | 27 | ||
29 | mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); | 28 | mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); |
30 | if (IS_ERR(mnt)) | 29 | if (IS_ERR(mnt)) |
31 | return (struct file *)mnt; | 30 | return (struct file *)mnt; |
32 | 31 | ||
33 | error = vfs_path_lookup(mnt->mnt_root, mnt, name, 0, &nd); | 32 | file = file_open_root(mnt->mnt_root, mnt, name, flags); |
34 | mntput(mnt); /* drop do_kern_mount reference */ | ||
35 | if (error) | ||
36 | return ERR_PTR(error); | ||
37 | |||
38 | if (flags == O_RDWR) | ||
39 | error = may_open(&nd.path, MAY_READ|MAY_WRITE, flags); | ||
40 | else | ||
41 | error = may_open(&nd.path, MAY_WRITE, flags); | ||
42 | 33 | ||
43 | if (!error) | 34 | mntput(mnt); /* drop do_kern_mount reference */ |
44 | return dentry_open(nd.path.dentry, nd.path.mnt, flags, | 35 | return file; |
45 | current_cred()); | ||
46 | |||
47 | path_put(&nd.path); | ||
48 | return ERR_PTR(error); | ||
49 | } | 36 | } |
50 | 37 | ||
51 | static struct { | 38 | static struct { |
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 5dbc3062b4f..254652a9b54 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c | |||
@@ -197,8 +197,12 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, | |||
197 | dentry->d_name.len, dentry->d_name.name, | 197 | dentry->d_name.len, dentry->d_name.name, |
198 | fh, len, connectable); | 198 | fh, len, connectable); |
199 | 199 | ||
200 | if (len < 3 || (connectable && len < 6)) { | 200 | if (connectable && (len < 6)) { |
201 | mlog(ML_ERROR, "fh buffer is too small for encoding\n"); | 201 | *max_len = 6; |
202 | type = 255; | ||
203 | goto bail; | ||
204 | } else if (len < 3) { | ||
205 | *max_len = 3; | ||
202 | type = 255; | 206 | type = 255; |
203 | goto bail; | 207 | goto bail; |
204 | } | 208 | } |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index 196fcb52d95..d5ab56cbe5c 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -114,7 +114,4 @@ int ocfs2_local_write_dquot(struct dquot *dquot); | |||
114 | extern const struct dquot_operations ocfs2_quota_operations; | 114 | extern const struct dquot_operations ocfs2_quota_operations; |
115 | extern struct quota_format_type ocfs2_quota_format; | 115 | extern struct quota_format_type ocfs2_quota_format; |
116 | 116 | ||
117 | int ocfs2_quota_setup(void); | ||
118 | void ocfs2_quota_shutdown(void); | ||
119 | |||
120 | #endif /* _OCFS2_QUOTA_H */ | 117 | #endif /* _OCFS2_QUOTA_H */ |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 4607923eb24..a73f6416648 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -63,8 +63,6 @@ | |||
63 | * write to gf | 63 | * write to gf |
64 | */ | 64 | */ |
65 | 65 | ||
66 | static struct workqueue_struct *ocfs2_quota_wq = NULL; | ||
67 | |||
68 | static void qsync_work_fn(struct work_struct *work); | 66 | static void qsync_work_fn(struct work_struct *work); |
69 | 67 | ||
70 | static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) | 68 | static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) |
@@ -400,8 +398,8 @@ int ocfs2_global_read_info(struct super_block *sb, int type) | |||
400 | OCFS2_QBLK_RESERVED_SPACE; | 398 | OCFS2_QBLK_RESERVED_SPACE; |
401 | oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); | 399 | oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); |
402 | INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); | 400 | INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); |
403 | queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, | 401 | schedule_delayed_work(&oinfo->dqi_sync_work, |
404 | msecs_to_jiffies(oinfo->dqi_syncms)); | 402 | msecs_to_jiffies(oinfo->dqi_syncms)); |
405 | 403 | ||
406 | out_err: | 404 | out_err: |
407 | mlog_exit(status); | 405 | mlog_exit(status); |
@@ -635,8 +633,8 @@ static void qsync_work_fn(struct work_struct *work) | |||
635 | struct super_block *sb = oinfo->dqi_gqinode->i_sb; | 633 | struct super_block *sb = oinfo->dqi_gqinode->i_sb; |
636 | 634 | ||
637 | dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); | 635 | dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); |
638 | queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, | 636 | schedule_delayed_work(&oinfo->dqi_sync_work, |
639 | msecs_to_jiffies(oinfo->dqi_syncms)); | 637 | msecs_to_jiffies(oinfo->dqi_syncms)); |
640 | } | 638 | } |
641 | 639 | ||
642 | /* | 640 | /* |
@@ -923,20 +921,3 @@ const struct dquot_operations ocfs2_quota_operations = { | |||
923 | .alloc_dquot = ocfs2_alloc_dquot, | 921 | .alloc_dquot = ocfs2_alloc_dquot, |
924 | .destroy_dquot = ocfs2_destroy_dquot, | 922 | .destroy_dquot = ocfs2_destroy_dquot, |
925 | }; | 923 | }; |
926 | |||
927 | int ocfs2_quota_setup(void) | ||
928 | { | ||
929 | ocfs2_quota_wq = create_workqueue("o2quot"); | ||
930 | if (!ocfs2_quota_wq) | ||
931 | return -ENOMEM; | ||
932 | return 0; | ||
933 | } | ||
934 | |||
935 | void ocfs2_quota_shutdown(void) | ||
936 | { | ||
937 | if (ocfs2_quota_wq) { | ||
938 | flush_workqueue(ocfs2_quota_wq); | ||
939 | destroy_workqueue(ocfs2_quota_wq); | ||
940 | ocfs2_quota_wq = NULL; | ||
941 | } | ||
942 | } | ||
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 35798b88042..c384d634872 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4380,7 +4380,7 @@ static int ocfs2_user_path_parent(const char __user *path, | |||
4380 | if (IS_ERR(s)) | 4380 | if (IS_ERR(s)) |
4381 | return PTR_ERR(s); | 4381 | return PTR_ERR(s); |
4382 | 4382 | ||
4383 | error = path_lookup(s, LOOKUP_PARENT, nd); | 4383 | error = kern_path_parent(s, nd); |
4384 | if (error) | 4384 | if (error) |
4385 | putname(s); | 4385 | putname(s); |
4386 | else | 4386 | else |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 36c423fb063..236ed1bdca2 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1657,16 +1657,11 @@ static int __init ocfs2_init(void) | |||
1657 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); | 1657 | mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n"); |
1658 | } | 1658 | } |
1659 | 1659 | ||
1660 | status = ocfs2_quota_setup(); | ||
1661 | if (status) | ||
1662 | goto leave; | ||
1663 | |||
1664 | ocfs2_set_locking_protocol(); | 1660 | ocfs2_set_locking_protocol(); |
1665 | 1661 | ||
1666 | status = register_quota_format(&ocfs2_quota_format); | 1662 | status = register_quota_format(&ocfs2_quota_format); |
1667 | leave: | 1663 | leave: |
1668 | if (status < 0) { | 1664 | if (status < 0) { |
1669 | ocfs2_quota_shutdown(); | ||
1670 | ocfs2_free_mem_caches(); | 1665 | ocfs2_free_mem_caches(); |
1671 | exit_ocfs2_uptodate_cache(); | 1666 | exit_ocfs2_uptodate_cache(); |
1672 | } | 1667 | } |
@@ -1683,8 +1678,6 @@ static void __exit ocfs2_exit(void) | |||
1683 | { | 1678 | { |
1684 | mlog_entry_void(); | 1679 | mlog_entry_void(); |
1685 | 1680 | ||
1686 | ocfs2_quota_shutdown(); | ||
1687 | |||
1688 | if (ocfs2_wq) { | 1681 | if (ocfs2_wq) { |
1689 | flush_workqueue(ocfs2_wq); | 1682 | flush_workqueue(ocfs2_wq); |
1690 | destroy_workqueue(ocfs2_wq); | 1683 | destroy_workqueue(ocfs2_wq); |
@@ -573,13 +573,15 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, | |||
573 | { | 573 | { |
574 | struct path path; | 574 | struct path path; |
575 | int error = -EINVAL; | 575 | int error = -EINVAL; |
576 | int follow; | 576 | int lookup_flags; |
577 | 577 | ||
578 | if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0) | 578 | if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) |
579 | goto out; | 579 | goto out; |
580 | 580 | ||
581 | follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; | 581 | lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; |
582 | error = user_path_at(dfd, filename, follow, &path); | 582 | if (flag & AT_EMPTY_PATH) |
583 | lookup_flags |= LOOKUP_EMPTY; | ||
584 | error = user_path_at(dfd, filename, lookup_flags, &path); | ||
583 | if (error) | 585 | if (error) |
584 | goto out; | 586 | goto out; |
585 | error = mnt_want_write(path.mnt); | 587 | error = mnt_want_write(path.mnt); |
@@ -669,11 +671,16 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
669 | int (*open)(struct inode *, struct file *), | 671 | int (*open)(struct inode *, struct file *), |
670 | const struct cred *cred) | 672 | const struct cred *cred) |
671 | { | 673 | { |
674 | static const struct file_operations empty_fops = {}; | ||
672 | struct inode *inode; | 675 | struct inode *inode; |
673 | int error; | 676 | int error; |
674 | 677 | ||
675 | f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | | 678 | f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK | |
676 | FMODE_PREAD | FMODE_PWRITE; | 679 | FMODE_PREAD | FMODE_PWRITE; |
680 | |||
681 | if (unlikely(f->f_flags & O_PATH)) | ||
682 | f->f_mode = FMODE_PATH; | ||
683 | |||
677 | inode = dentry->d_inode; | 684 | inode = dentry->d_inode; |
678 | if (f->f_mode & FMODE_WRITE) { | 685 | if (f->f_mode & FMODE_WRITE) { |
679 | error = __get_file_write_access(inode, mnt); | 686 | error = __get_file_write_access(inode, mnt); |
@@ -687,9 +694,15 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, | |||
687 | f->f_path.dentry = dentry; | 694 | f->f_path.dentry = dentry; |
688 | f->f_path.mnt = mnt; | 695 | f->f_path.mnt = mnt; |
689 | f->f_pos = 0; | 696 | f->f_pos = 0; |
690 | f->f_op = fops_get(inode->i_fop); | ||
691 | file_sb_list_add(f, inode->i_sb); | 697 | file_sb_list_add(f, inode->i_sb); |
692 | 698 | ||
699 | if (unlikely(f->f_mode & FMODE_PATH)) { | ||
700 | f->f_op = &empty_fops; | ||
701 | return f; | ||
702 | } | ||
703 | |||
704 | f->f_op = fops_get(inode->i_fop); | ||
705 | |||
693 | error = security_dentry_open(f, cred); | 706 | error = security_dentry_open(f, cred); |
694 | if (error) | 707 | if (error) |
695 | goto cleanup_all; | 708 | goto cleanup_all; |
@@ -891,15 +904,110 @@ void fd_install(unsigned int fd, struct file *file) | |||
891 | 904 | ||
892 | EXPORT_SYMBOL(fd_install); | 905 | EXPORT_SYMBOL(fd_install); |
893 | 906 | ||
907 | static inline int build_open_flags(int flags, int mode, struct open_flags *op) | ||
908 | { | ||
909 | int lookup_flags = 0; | ||
910 | int acc_mode; | ||
911 | |||
912 | if (!(flags & O_CREAT)) | ||
913 | mode = 0; | ||
914 | op->mode = mode; | ||
915 | |||
916 | /* Must never be set by userspace */ | ||
917 | flags &= ~FMODE_NONOTIFY; | ||
918 | |||
919 | /* | ||
920 | * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only | ||
921 | * check for O_DSYNC if the need any syncing at all we enforce it's | ||
922 | * always set instead of having to deal with possibly weird behaviour | ||
923 | * for malicious applications setting only __O_SYNC. | ||
924 | */ | ||
925 | if (flags & __O_SYNC) | ||
926 | flags |= O_DSYNC; | ||
927 | |||
928 | /* | ||
929 | * If we have O_PATH in the open flag. Then we | ||
930 | * cannot have anything other than the below set of flags | ||
931 | */ | ||
932 | if (flags & O_PATH) { | ||
933 | flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH; | ||
934 | acc_mode = 0; | ||
935 | } else { | ||
936 | acc_mode = MAY_OPEN | ACC_MODE(flags); | ||
937 | } | ||
938 | |||
939 | op->open_flag = flags; | ||
940 | |||
941 | /* O_TRUNC implies we need access checks for write permissions */ | ||
942 | if (flags & O_TRUNC) | ||
943 | acc_mode |= MAY_WRITE; | ||
944 | |||
945 | /* Allow the LSM permission hook to distinguish append | ||
946 | access from general write access. */ | ||
947 | if (flags & O_APPEND) | ||
948 | acc_mode |= MAY_APPEND; | ||
949 | |||
950 | op->acc_mode = acc_mode; | ||
951 | |||
952 | op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN; | ||
953 | |||
954 | if (flags & O_CREAT) { | ||
955 | op->intent |= LOOKUP_CREATE; | ||
956 | if (flags & O_EXCL) | ||
957 | op->intent |= LOOKUP_EXCL; | ||
958 | } | ||
959 | |||
960 | if (flags & O_DIRECTORY) | ||
961 | lookup_flags |= LOOKUP_DIRECTORY; | ||
962 | if (!(flags & O_NOFOLLOW)) | ||
963 | lookup_flags |= LOOKUP_FOLLOW; | ||
964 | return lookup_flags; | ||
965 | } | ||
966 | |||
967 | /** | ||
968 | * filp_open - open file and return file pointer | ||
969 | * | ||
970 | * @filename: path to open | ||
971 | * @flags: open flags as per the open(2) second argument | ||
972 | * @mode: mode for the new file if O_CREAT is set, else ignored | ||
973 | * | ||
974 | * This is the helper to open a file from kernelspace if you really | ||
975 | * have to. But in generally you should not do this, so please move | ||
976 | * along, nothing to see here.. | ||
977 | */ | ||
978 | struct file *filp_open(const char *filename, int flags, int mode) | ||
979 | { | ||
980 | struct open_flags op; | ||
981 | int lookup = build_open_flags(flags, mode, &op); | ||
982 | return do_filp_open(AT_FDCWD, filename, &op, lookup); | ||
983 | } | ||
984 | EXPORT_SYMBOL(filp_open); | ||
985 | |||
986 | struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt, | ||
987 | const char *filename, int flags) | ||
988 | { | ||
989 | struct open_flags op; | ||
990 | int lookup = build_open_flags(flags, 0, &op); | ||
991 | if (flags & O_CREAT) | ||
992 | return ERR_PTR(-EINVAL); | ||
993 | if (!filename && (flags & O_DIRECTORY)) | ||
994 | if (!dentry->d_inode->i_op->lookup) | ||
995 | return ERR_PTR(-ENOTDIR); | ||
996 | return do_file_open_root(dentry, mnt, filename, &op, lookup); | ||
997 | } | ||
998 | EXPORT_SYMBOL(file_open_root); | ||
999 | |||
894 | long do_sys_open(int dfd, const char __user *filename, int flags, int mode) | 1000 | long do_sys_open(int dfd, const char __user *filename, int flags, int mode) |
895 | { | 1001 | { |
1002 | struct open_flags op; | ||
1003 | int lookup = build_open_flags(flags, mode, &op); | ||
896 | char *tmp = getname(filename); | 1004 | char *tmp = getname(filename); |
897 | int fd = PTR_ERR(tmp); | 1005 | int fd = PTR_ERR(tmp); |
898 | 1006 | ||
899 | if (!IS_ERR(tmp)) { | 1007 | if (!IS_ERR(tmp)) { |
900 | fd = get_unused_fd_flags(flags); | 1008 | fd = get_unused_fd_flags(flags); |
901 | if (fd >= 0) { | 1009 | if (fd >= 0) { |
902 | struct file *f = do_filp_open(dfd, tmp, flags, mode, 0); | 1010 | struct file *f = do_filp_open(dfd, tmp, &op, lookup); |
903 | if (IS_ERR(f)) { | 1011 | if (IS_ERR(f)) { |
904 | put_unused_fd(fd); | 1012 | put_unused_fd(fd); |
905 | fd = PTR_ERR(f); | 1013 | fd = PTR_ERR(f); |
@@ -969,8 +1077,10 @@ int filp_close(struct file *filp, fl_owner_t id) | |||
969 | if (filp->f_op && filp->f_op->flush) | 1077 | if (filp->f_op && filp->f_op->flush) |
970 | retval = filp->f_op->flush(filp, id); | 1078 | retval = filp->f_op->flush(filp, id); |
971 | 1079 | ||
972 | dnotify_flush(filp, id); | 1080 | if (likely(!(filp->f_mode & FMODE_PATH))) { |
973 | locks_remove_posix(filp, id); | 1081 | dnotify_flush(filp, id); |
1082 | locks_remove_posix(filp, id); | ||
1083 | } | ||
974 | fput(filp); | 1084 | fput(filp); |
975 | return retval; | 1085 | return retval; |
976 | } | 1086 | } |
diff --git a/fs/partitions/osf.c b/fs/partitions/osf.c index be03a0b08b4..764b86a0196 100644 --- a/fs/partitions/osf.c +++ b/fs/partitions/osf.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include "check.h" | 10 | #include "check.h" |
11 | #include "osf.h" | 11 | #include "osf.h" |
12 | 12 | ||
13 | #define MAX_OSF_PARTITIONS 8 | 13 | #define MAX_OSF_PARTITIONS 18 |
14 | 14 | ||
15 | int osf_partition(struct parsed_partitions *state) | 15 | int osf_partition(struct parsed_partitions *state) |
16 | { | 16 | { |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0bae036831e..1bba24bad82 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -1593,8 +1593,13 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, | |||
1593 | struct inode *inode = dentry->d_inode; | 1593 | struct inode *inode = dentry->d_inode; |
1594 | int maxlen = *lenp; | 1594 | int maxlen = *lenp; |
1595 | 1595 | ||
1596 | if (maxlen < 3) | 1596 | if (need_parent && (maxlen < 5)) { |
1597 | *lenp = 5; | ||
1597 | return 255; | 1598 | return 255; |
1599 | } else if (maxlen < 3) { | ||
1600 | *lenp = 3; | ||
1601 | return 255; | ||
1602 | } | ||
1598 | 1603 | ||
1599 | data[0] = inode->i_ino; | 1604 | data[0] = inode->i_ino; |
1600 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); | 1605 | data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 3eea859e699..c77514bd577 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -2876,7 +2876,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, | |||
2876 | reiserfs_mounted_fs_count++; | 2876 | reiserfs_mounted_fs_count++; |
2877 | if (reiserfs_mounted_fs_count <= 1) { | 2877 | if (reiserfs_mounted_fs_count <= 1) { |
2878 | reiserfs_write_unlock(sb); | 2878 | reiserfs_write_unlock(sb); |
2879 | commit_wq = create_workqueue("reiserfs"); | 2879 | commit_wq = alloc_workqueue("reiserfs", WQ_MEM_RECLAIM, 0); |
2880 | reiserfs_write_lock(sb); | 2880 | reiserfs_write_lock(sb); |
2881 | } | 2881 | } |
2882 | 2882 | ||
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 82f45542dcf..118662690cd 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -1123,10 +1123,6 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1123 | reiserfs_write_unlock(dir->i_sb); | 1123 | reiserfs_write_unlock(dir->i_sb); |
1124 | return -EMLINK; | 1124 | return -EMLINK; |
1125 | } | 1125 | } |
1126 | if (inode->i_nlink == 0) { | ||
1127 | reiserfs_write_unlock(dir->i_sb); | ||
1128 | return -ENOENT; | ||
1129 | } | ||
1130 | 1126 | ||
1131 | /* inc before scheduling so reiserfs_unlink knows we are here */ | 1127 | /* inc before scheduling so reiserfs_unlink knows we are here */ |
1132 | inc_nlink(inode); | 1128 | inc_nlink(inode); |
@@ -75,13 +75,16 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, | |||
75 | int error = -EINVAL; | 75 | int error = -EINVAL; |
76 | int lookup_flags = 0; | 76 | int lookup_flags = 0; |
77 | 77 | ||
78 | if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)) != 0) | 78 | if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | |
79 | AT_EMPTY_PATH)) != 0) | ||
79 | goto out; | 80 | goto out; |
80 | 81 | ||
81 | if (!(flag & AT_SYMLINK_NOFOLLOW)) | 82 | if (!(flag & AT_SYMLINK_NOFOLLOW)) |
82 | lookup_flags |= LOOKUP_FOLLOW; | 83 | lookup_flags |= LOOKUP_FOLLOW; |
83 | if (flag & AT_NO_AUTOMOUNT) | 84 | if (flag & AT_NO_AUTOMOUNT) |
84 | lookup_flags |= LOOKUP_NO_AUTOMOUNT; | 85 | lookup_flags |= LOOKUP_NO_AUTOMOUNT; |
86 | if (flag & AT_EMPTY_PATH) | ||
87 | lookup_flags |= LOOKUP_EMPTY; | ||
85 | 88 | ||
86 | error = user_path_at(dfd, filename, lookup_flags, &path); | 89 | error = user_path_at(dfd, filename, lookup_flags, &path); |
87 | if (error) | 90 | if (error) |
@@ -297,7 +300,7 @@ SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, | |||
297 | if (bufsiz <= 0) | 300 | if (bufsiz <= 0) |
298 | return -EINVAL; | 301 | return -EINVAL; |
299 | 302 | ||
300 | error = user_path_at(dfd, pathname, 0, &path); | 303 | error = user_path_at(dfd, pathname, LOOKUP_EMPTY, &path); |
301 | if (!error) { | 304 | if (!error) { |
302 | struct inode *inode = path.dentry->d_inode; | 305 | struct inode *inode = path.dentry->d_inode; |
303 | 306 | ||
diff --git a/fs/statfs.c b/fs/statfs.c index 30ea8c8a996..8244924dec5 100644 --- a/fs/statfs.c +++ b/fs/statfs.c | |||
@@ -73,149 +73,135 @@ int vfs_statfs(struct path *path, struct kstatfs *buf) | |||
73 | } | 73 | } |
74 | EXPORT_SYMBOL(vfs_statfs); | 74 | EXPORT_SYMBOL(vfs_statfs); |
75 | 75 | ||
76 | static int do_statfs_native(struct path *path, struct statfs *buf) | 76 | int user_statfs(const char __user *pathname, struct kstatfs *st) |
77 | { | 77 | { |
78 | struct kstatfs st; | 78 | struct path path; |
79 | int retval; | 79 | int error = user_path(pathname, &path); |
80 | if (!error) { | ||
81 | error = vfs_statfs(&path, st); | ||
82 | path_put(&path); | ||
83 | } | ||
84 | return error; | ||
85 | } | ||
80 | 86 | ||
81 | retval = vfs_statfs(path, &st); | 87 | int fd_statfs(int fd, struct kstatfs *st) |
82 | if (retval) | 88 | { |
83 | return retval; | 89 | struct file *file = fget(fd); |
90 | int error = -EBADF; | ||
91 | if (file) { | ||
92 | error = vfs_statfs(&file->f_path, st); | ||
93 | fput(file); | ||
94 | } | ||
95 | return error; | ||
96 | } | ||
84 | 97 | ||
85 | if (sizeof(*buf) == sizeof(st)) | 98 | static int do_statfs_native(struct kstatfs *st, struct statfs __user *p) |
86 | memcpy(buf, &st, sizeof(st)); | 99 | { |
100 | struct statfs buf; | ||
101 | |||
102 | if (sizeof(buf) == sizeof(*st)) | ||
103 | memcpy(&buf, st, sizeof(*st)); | ||
87 | else { | 104 | else { |
88 | if (sizeof buf->f_blocks == 4) { | 105 | if (sizeof buf.f_blocks == 4) { |
89 | if ((st.f_blocks | st.f_bfree | st.f_bavail | | 106 | if ((st->f_blocks | st->f_bfree | st->f_bavail | |
90 | st.f_bsize | st.f_frsize) & | 107 | st->f_bsize | st->f_frsize) & |
91 | 0xffffffff00000000ULL) | 108 | 0xffffffff00000000ULL) |
92 | return -EOVERFLOW; | 109 | return -EOVERFLOW; |
93 | /* | 110 | /* |
94 | * f_files and f_ffree may be -1; it's okay to stuff | 111 | * f_files and f_ffree may be -1; it's okay to stuff |
95 | * that into 32 bits | 112 | * that into 32 bits |
96 | */ | 113 | */ |
97 | if (st.f_files != -1 && | 114 | if (st->f_files != -1 && |
98 | (st.f_files & 0xffffffff00000000ULL)) | 115 | (st->f_files & 0xffffffff00000000ULL)) |
99 | return -EOVERFLOW; | 116 | return -EOVERFLOW; |
100 | if (st.f_ffree != -1 && | 117 | if (st->f_ffree != -1 && |
101 | (st.f_ffree & 0xffffffff00000000ULL)) | 118 | (st->f_ffree & 0xffffffff00000000ULL)) |
102 | return -EOVERFLOW; | 119 | return -EOVERFLOW; |
103 | } | 120 | } |
104 | 121 | ||
105 | buf->f_type = st.f_type; | 122 | buf.f_type = st->f_type; |
106 | buf->f_bsize = st.f_bsize; | 123 | buf.f_bsize = st->f_bsize; |
107 | buf->f_blocks = st.f_blocks; | 124 | buf.f_blocks = st->f_blocks; |
108 | buf->f_bfree = st.f_bfree; | 125 | buf.f_bfree = st->f_bfree; |
109 | buf->f_bavail = st.f_bavail; | 126 | buf.f_bavail = st->f_bavail; |
110 | buf->f_files = st.f_files; | 127 | buf.f_files = st->f_files; |
111 | buf->f_ffree = st.f_ffree; | 128 | buf.f_ffree = st->f_ffree; |
112 | buf->f_fsid = st.f_fsid; | 129 | buf.f_fsid = st->f_fsid; |
113 | buf->f_namelen = st.f_namelen; | 130 | buf.f_namelen = st->f_namelen; |
114 | buf->f_frsize = st.f_frsize; | 131 | buf.f_frsize = st->f_frsize; |
115 | buf->f_flags = st.f_flags; | 132 | buf.f_flags = st->f_flags; |
116 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | 133 | memset(buf.f_spare, 0, sizeof(buf.f_spare)); |
117 | } | 134 | } |
135 | if (copy_to_user(p, &buf, sizeof(buf))) | ||
136 | return -EFAULT; | ||
118 | return 0; | 137 | return 0; |
119 | } | 138 | } |
120 | 139 | ||
121 | static int do_statfs64(struct path *path, struct statfs64 *buf) | 140 | static int do_statfs64(struct kstatfs *st, struct statfs64 __user *p) |
122 | { | 141 | { |
123 | struct kstatfs st; | 142 | struct statfs64 buf; |
124 | int retval; | 143 | if (sizeof(buf) == sizeof(*st)) |
125 | 144 | memcpy(&buf, st, sizeof(*st)); | |
126 | retval = vfs_statfs(path, &st); | ||
127 | if (retval) | ||
128 | return retval; | ||
129 | |||
130 | if (sizeof(*buf) == sizeof(st)) | ||
131 | memcpy(buf, &st, sizeof(st)); | ||
132 | else { | 145 | else { |
133 | buf->f_type = st.f_type; | 146 | buf.f_type = st->f_type; |
134 | buf->f_bsize = st.f_bsize; | 147 | buf.f_bsize = st->f_bsize; |
135 | buf->f_blocks = st.f_blocks; | 148 | buf.f_blocks = st->f_blocks; |
136 | buf->f_bfree = st.f_bfree; | 149 | buf.f_bfree = st->f_bfree; |
137 | buf->f_bavail = st.f_bavail; | 150 | buf.f_bavail = st->f_bavail; |
138 | buf->f_files = st.f_files; | 151 | buf.f_files = st->f_files; |
139 | buf->f_ffree = st.f_ffree; | 152 | buf.f_ffree = st->f_ffree; |
140 | buf->f_fsid = st.f_fsid; | 153 | buf.f_fsid = st->f_fsid; |
141 | buf->f_namelen = st.f_namelen; | 154 | buf.f_namelen = st->f_namelen; |
142 | buf->f_frsize = st.f_frsize; | 155 | buf.f_frsize = st->f_frsize; |
143 | buf->f_flags = st.f_flags; | 156 | buf.f_flags = st->f_flags; |
144 | memset(buf->f_spare, 0, sizeof(buf->f_spare)); | 157 | memset(buf.f_spare, 0, sizeof(buf.f_spare)); |
145 | } | 158 | } |
159 | if (copy_to_user(p, &buf, sizeof(buf))) | ||
160 | return -EFAULT; | ||
146 | return 0; | 161 | return 0; |
147 | } | 162 | } |
148 | 163 | ||
149 | SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) | 164 | SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) |
150 | { | 165 | { |
151 | struct path path; | 166 | struct kstatfs st; |
152 | int error; | 167 | int error = user_statfs(pathname, &st); |
153 | 168 | if (!error) | |
154 | error = user_path(pathname, &path); | 169 | error = do_statfs_native(&st, buf); |
155 | if (!error) { | ||
156 | struct statfs tmp; | ||
157 | error = do_statfs_native(&path, &tmp); | ||
158 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
159 | error = -EFAULT; | ||
160 | path_put(&path); | ||
161 | } | ||
162 | return error; | 170 | return error; |
163 | } | 171 | } |
164 | 172 | ||
165 | SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) | 173 | SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) |
166 | { | 174 | { |
167 | struct path path; | 175 | struct kstatfs st; |
168 | long error; | 176 | int error; |
169 | |||
170 | if (sz != sizeof(*buf)) | 177 | if (sz != sizeof(*buf)) |
171 | return -EINVAL; | 178 | return -EINVAL; |
172 | error = user_path(pathname, &path); | 179 | error = user_statfs(pathname, &st); |
173 | if (!error) { | 180 | if (!error) |
174 | struct statfs64 tmp; | 181 | error = do_statfs64(&st, buf); |
175 | error = do_statfs64(&path, &tmp); | ||
176 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
177 | error = -EFAULT; | ||
178 | path_put(&path); | ||
179 | } | ||
180 | return error; | 182 | return error; |
181 | } | 183 | } |
182 | 184 | ||
183 | SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) | 185 | SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) |
184 | { | 186 | { |
185 | struct file *file; | 187 | struct kstatfs st; |
186 | struct statfs tmp; | 188 | int error = fd_statfs(fd, &st); |
187 | int error; | 189 | if (!error) |
188 | 190 | error = do_statfs_native(&st, buf); | |
189 | error = -EBADF; | ||
190 | file = fget(fd); | ||
191 | if (!file) | ||
192 | goto out; | ||
193 | error = do_statfs_native(&file->f_path, &tmp); | ||
194 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
195 | error = -EFAULT; | ||
196 | fput(file); | ||
197 | out: | ||
198 | return error; | 191 | return error; |
199 | } | 192 | } |
200 | 193 | ||
201 | SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) | 194 | SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) |
202 | { | 195 | { |
203 | struct file *file; | 196 | struct kstatfs st; |
204 | struct statfs64 tmp; | ||
205 | int error; | 197 | int error; |
206 | 198 | ||
207 | if (sz != sizeof(*buf)) | 199 | if (sz != sizeof(*buf)) |
208 | return -EINVAL; | 200 | return -EINVAL; |
209 | 201 | ||
210 | error = -EBADF; | 202 | error = fd_statfs(fd, &st); |
211 | file = fget(fd); | 203 | if (!error) |
212 | if (!file) | 204 | error = do_statfs64(&st, buf); |
213 | goto out; | ||
214 | error = do_statfs64(&file->f_path, &tmp); | ||
215 | if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) | ||
216 | error = -EFAULT; | ||
217 | fput(file); | ||
218 | out: | ||
219 | return error; | 205 | return error; |
220 | } | 206 | } |
221 | 207 | ||
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 14f64b689d7..7217d67a80a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, | |||
522 | ubifs_assert(mutex_is_locked(&dir->i_mutex)); | 522 | ubifs_assert(mutex_is_locked(&dir->i_mutex)); |
523 | ubifs_assert(mutex_is_locked(&inode->i_mutex)); | 523 | ubifs_assert(mutex_is_locked(&inode->i_mutex)); |
524 | 524 | ||
525 | /* | ||
526 | * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing | ||
527 | * otherwise has the potential to corrupt the orphan inode list. | ||
528 | * | ||
529 | * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and | ||
530 | * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not | ||
531 | * lock 'dirA->i_mutex', so this is possible. Both of the functions | ||
532 | * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes | ||
533 | * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this | ||
534 | * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA' | ||
535 | * to the list of orphans. After this, 'vfs_link()' will link | ||
536 | * 'dirB/fileB' to 'inodeA'. This is a problem because, for example, | ||
537 | * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode | ||
538 | * to the list of orphans. | ||
539 | */ | ||
540 | if (inode->i_nlink == 0) | ||
541 | return -ENOENT; | ||
542 | |||
543 | err = dbg_check_synced_i_size(inode); | 525 | err = dbg_check_synced_i_size(inode); |
544 | if (err) | 526 | if (err) |
545 | return err; | 527 | return err; |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index b7c338d5e9d..f1dce848ef9 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -1286,8 +1286,13 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, | |||
1286 | struct fid *fid = (struct fid *)fh; | 1286 | struct fid *fid = (struct fid *)fh; |
1287 | int type = FILEID_UDF_WITHOUT_PARENT; | 1287 | int type = FILEID_UDF_WITHOUT_PARENT; |
1288 | 1288 | ||
1289 | if (len < 3 || (connectable && len < 5)) | 1289 | if (connectable && (len < 5)) { |
1290 | *lenp = 5; | ||
1291 | return 255; | ||
1292 | } else if (len < 3) { | ||
1293 | *lenp = 3; | ||
1290 | return 255; | 1294 | return 255; |
1295 | } | ||
1291 | 1296 | ||
1292 | *lenp = 3; | 1297 | *lenp = 3; |
1293 | fid->udf.block = location.logicalBlockNum; | 1298 | fid->udf.block = location.logicalBlockNum; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ac1c7e8378d..f83a4c830a6 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -2022,11 +2022,12 @@ xfs_buf_init(void) | |||
2022 | if (!xfslogd_workqueue) | 2022 | if (!xfslogd_workqueue) |
2023 | goto out_free_buf_zone; | 2023 | goto out_free_buf_zone; |
2024 | 2024 | ||
2025 | xfsdatad_workqueue = create_workqueue("xfsdatad"); | 2025 | xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); |
2026 | if (!xfsdatad_workqueue) | 2026 | if (!xfsdatad_workqueue) |
2027 | goto out_destroy_xfslogd_workqueue; | 2027 | goto out_destroy_xfslogd_workqueue; |
2028 | 2028 | ||
2029 | xfsconvertd_workqueue = create_workqueue("xfsconvertd"); | 2029 | xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", |
2030 | WQ_MEM_RECLAIM, 1); | ||
2030 | if (!xfsconvertd_workqueue) | 2031 | if (!xfsconvertd_workqueue) |
2031 | goto out_destroy_xfsdatad_workqueue; | 2032 | goto out_destroy_xfsdatad_workqueue; |
2032 | 2033 | ||
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index fc0114da7fd..f4f878fc008 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c | |||
@@ -89,8 +89,10 @@ xfs_fs_encode_fh( | |||
89 | * seven combinations work. The real answer is "don't use v2". | 89 | * seven combinations work. The real answer is "don't use v2". |
90 | */ | 90 | */ |
91 | len = xfs_fileid_length(fileid_type); | 91 | len = xfs_fileid_length(fileid_type); |
92 | if (*max_len < len) | 92 | if (*max_len < len) { |
93 | *max_len = len; | ||
93 | return 255; | 94 | return 255; |
95 | } | ||
94 | *max_len = len; | 96 | *max_len = len; |
95 | 97 | ||
96 | switch (fileid_type) { | 98 | switch (fileid_type) { |
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index edfa178bafb..4aff5639573 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c | |||
@@ -309,7 +309,7 @@ xfs_mru_cache_init(void) | |||
309 | if (!xfs_mru_elem_zone) | 309 | if (!xfs_mru_elem_zone) |
310 | goto out; | 310 | goto out; |
311 | 311 | ||
312 | xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); | 312 | xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1); |
313 | if (!xfs_mru_reap_wq) | 313 | if (!xfs_mru_reap_wq) |
314 | goto out_destroy_mru_elem_zone; | 314 | goto out_destroy_mru_elem_zone; |
315 | 315 | ||