diff options
author | Tejun Heo <tj@kernel.org> | 2011-05-24 03:59:36 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2011-05-24 03:59:36 -0400 |
commit | 6988f20fe04e9ef3aea488cb8ab57fbeb78e12f0 (patch) | |
tree | c9d7fc50a2e2147a5ca07e3096e7eeb916ad2da9 /fs | |
parent | 0415b00d175e0d8945e6785aad21b5f157976ce0 (diff) | |
parent | 6ea0c34dac89611126455537552cffe6c7e832ad (diff) |
Merge branch 'fixes-2.6.39' into for-2.6.40
Diffstat (limited to 'fs')
55 files changed, 2648 insertions, 1118 deletions
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 54f923792728..475f9c597cb7 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
@@ -61,8 +61,6 @@ do { \ | |||
61 | current->pid, __func__, ##args); \ | 61 | current->pid, __func__, ##args); \ |
62 | } while (0) | 62 | } while (0) |
63 | 63 | ||
64 | extern spinlock_t autofs4_lock; | ||
65 | |||
66 | /* Unified info structure. This is pointed to by both the dentry and | 64 | /* Unified info structure. This is pointed to by both the dentry and |
67 | inode structures. Each file in the filesystem has an instance of this | 65 | inode structures. Each file in the filesystem has an instance of this |
68 | structure. It holds a reference to the dentry, so dentries are never | 66 | structure. It holds a reference to the dentry, so dentries are never |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 1442da4860e5..509fe1eb66ae 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -372,6 +372,10 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, | |||
372 | return -EBUSY; | 372 | return -EBUSY; |
373 | } else { | 373 | } else { |
374 | struct file *pipe = fget(pipefd); | 374 | struct file *pipe = fget(pipefd); |
375 | if (!pipe) { | ||
376 | err = -EBADF; | ||
377 | goto out; | ||
378 | } | ||
375 | if (!pipe->f_op || !pipe->f_op->write) { | 379 | if (!pipe->f_op || !pipe->f_op->write) { |
376 | err = -EPIPE; | 380 | err = -EPIPE; |
377 | fput(pipe); | 381 | fput(pipe); |
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index f43100b9662b..450f529a4eae 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
@@ -87,18 +87,70 @@ done: | |||
87 | } | 87 | } |
88 | 88 | ||
89 | /* | 89 | /* |
90 | * Calculate and dget next entry in the subdirs list under root. | ||
91 | */ | ||
92 | static struct dentry *get_next_positive_subdir(struct dentry *prev, | ||
93 | struct dentry *root) | ||
94 | { | ||
95 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | ||
96 | struct list_head *next; | ||
97 | struct dentry *p, *q; | ||
98 | |||
99 | spin_lock(&sbi->lookup_lock); | ||
100 | |||
101 | if (prev == NULL) { | ||
102 | spin_lock(&root->d_lock); | ||
103 | prev = dget_dlock(root); | ||
104 | next = prev->d_subdirs.next; | ||
105 | p = prev; | ||
106 | goto start; | ||
107 | } | ||
108 | |||
109 | p = prev; | ||
110 | spin_lock(&p->d_lock); | ||
111 | again: | ||
112 | next = p->d_u.d_child.next; | ||
113 | start: | ||
114 | if (next == &root->d_subdirs) { | ||
115 | spin_unlock(&p->d_lock); | ||
116 | spin_unlock(&sbi->lookup_lock); | ||
117 | dput(prev); | ||
118 | return NULL; | ||
119 | } | ||
120 | |||
121 | q = list_entry(next, struct dentry, d_u.d_child); | ||
122 | |||
123 | spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED); | ||
124 | /* Negative dentry - try next */ | ||
125 | if (!simple_positive(q)) { | ||
126 | spin_unlock(&p->d_lock); | ||
127 | p = q; | ||
128 | goto again; | ||
129 | } | ||
130 | dget_dlock(q); | ||
131 | spin_unlock(&q->d_lock); | ||
132 | spin_unlock(&p->d_lock); | ||
133 | spin_unlock(&sbi->lookup_lock); | ||
134 | |||
135 | dput(prev); | ||
136 | |||
137 | return q; | ||
138 | } | ||
139 | |||
140 | /* | ||
90 | * Calculate and dget next entry in top down tree traversal. | 141 | * Calculate and dget next entry in top down tree traversal. |
91 | */ | 142 | */ |
92 | static struct dentry *get_next_positive_dentry(struct dentry *prev, | 143 | static struct dentry *get_next_positive_dentry(struct dentry *prev, |
93 | struct dentry *root) | 144 | struct dentry *root) |
94 | { | 145 | { |
146 | struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb); | ||
95 | struct list_head *next; | 147 | struct list_head *next; |
96 | struct dentry *p, *ret; | 148 | struct dentry *p, *ret; |
97 | 149 | ||
98 | if (prev == NULL) | 150 | if (prev == NULL) |
99 | return dget(root); | 151 | return dget(root); |
100 | 152 | ||
101 | spin_lock(&autofs4_lock); | 153 | spin_lock(&sbi->lookup_lock); |
102 | relock: | 154 | relock: |
103 | p = prev; | 155 | p = prev; |
104 | spin_lock(&p->d_lock); | 156 | spin_lock(&p->d_lock); |
@@ -110,7 +162,7 @@ again: | |||
110 | 162 | ||
111 | if (p == root) { | 163 | if (p == root) { |
112 | spin_unlock(&p->d_lock); | 164 | spin_unlock(&p->d_lock); |
113 | spin_unlock(&autofs4_lock); | 165 | spin_unlock(&sbi->lookup_lock); |
114 | dput(prev); | 166 | dput(prev); |
115 | return NULL; | 167 | return NULL; |
116 | } | 168 | } |
@@ -140,7 +192,7 @@ again: | |||
140 | dget_dlock(ret); | 192 | dget_dlock(ret); |
141 | spin_unlock(&ret->d_lock); | 193 | spin_unlock(&ret->d_lock); |
142 | spin_unlock(&p->d_lock); | 194 | spin_unlock(&p->d_lock); |
143 | spin_unlock(&autofs4_lock); | 195 | spin_unlock(&sbi->lookup_lock); |
144 | 196 | ||
145 | dput(prev); | 197 | dput(prev); |
146 | 198 | ||
@@ -290,11 +342,8 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
290 | spin_lock(&sbi->fs_lock); | 342 | spin_lock(&sbi->fs_lock); |
291 | ino = autofs4_dentry_ino(root); | 343 | ino = autofs4_dentry_ino(root); |
292 | /* No point expiring a pending mount */ | 344 | /* No point expiring a pending mount */ |
293 | if (ino->flags & AUTOFS_INF_PENDING) { | 345 | if (ino->flags & AUTOFS_INF_PENDING) |
294 | spin_unlock(&sbi->fs_lock); | 346 | goto out; |
295 | return NULL; | ||
296 | } | ||
297 | managed_dentry_set_transit(root); | ||
298 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { | 347 | if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { |
299 | struct autofs_info *ino = autofs4_dentry_ino(root); | 348 | struct autofs_info *ino = autofs4_dentry_ino(root); |
300 | ino->flags |= AUTOFS_INF_EXPIRING; | 349 | ino->flags |= AUTOFS_INF_EXPIRING; |
@@ -302,7 +351,7 @@ struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
302 | spin_unlock(&sbi->fs_lock); | 351 | spin_unlock(&sbi->fs_lock); |
303 | return root; | 352 | return root; |
304 | } | 353 | } |
305 | managed_dentry_clear_transit(root); | 354 | out: |
306 | spin_unlock(&sbi->fs_lock); | 355 | spin_unlock(&sbi->fs_lock); |
307 | dput(root); | 356 | dput(root); |
308 | 357 | ||
@@ -336,13 +385,12 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
336 | timeout = sbi->exp_timeout; | 385 | timeout = sbi->exp_timeout; |
337 | 386 | ||
338 | dentry = NULL; | 387 | dentry = NULL; |
339 | while ((dentry = get_next_positive_dentry(dentry, root))) { | 388 | while ((dentry = get_next_positive_subdir(dentry, root))) { |
340 | spin_lock(&sbi->fs_lock); | 389 | spin_lock(&sbi->fs_lock); |
341 | ino = autofs4_dentry_ino(dentry); | 390 | ino = autofs4_dentry_ino(dentry); |
342 | /* No point expiring a pending mount */ | 391 | /* No point expiring a pending mount */ |
343 | if (ino->flags & AUTOFS_INF_PENDING) | 392 | if (ino->flags & AUTOFS_INF_PENDING) |
344 | goto cont; | 393 | goto next; |
345 | managed_dentry_set_transit(dentry); | ||
346 | 394 | ||
347 | /* | 395 | /* |
348 | * Case 1: (i) indirect mount or top level pseudo direct mount | 396 | * Case 1: (i) indirect mount or top level pseudo direct mount |
@@ -402,8 +450,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, | |||
402 | } | 450 | } |
403 | } | 451 | } |
404 | next: | 452 | next: |
405 | managed_dentry_clear_transit(dentry); | ||
406 | cont: | ||
407 | spin_unlock(&sbi->fs_lock); | 453 | spin_unlock(&sbi->fs_lock); |
408 | } | 454 | } |
409 | return NULL; | 455 | return NULL; |
@@ -415,13 +461,13 @@ found: | |||
415 | ino->flags |= AUTOFS_INF_EXPIRING; | 461 | ino->flags |= AUTOFS_INF_EXPIRING; |
416 | init_completion(&ino->expire_complete); | 462 | init_completion(&ino->expire_complete); |
417 | spin_unlock(&sbi->fs_lock); | 463 | spin_unlock(&sbi->fs_lock); |
418 | spin_lock(&autofs4_lock); | 464 | spin_lock(&sbi->lookup_lock); |
419 | spin_lock(&expired->d_parent->d_lock); | 465 | spin_lock(&expired->d_parent->d_lock); |
420 | spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); | 466 | spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED); |
421 | list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); | 467 | list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); |
422 | spin_unlock(&expired->d_lock); | 468 | spin_unlock(&expired->d_lock); |
423 | spin_unlock(&expired->d_parent->d_lock); | 469 | spin_unlock(&expired->d_parent->d_lock); |
424 | spin_unlock(&autofs4_lock); | 470 | spin_unlock(&sbi->lookup_lock); |
425 | return expired; | 471 | return expired; |
426 | } | 472 | } |
427 | 473 | ||
@@ -484,8 +530,6 @@ int autofs4_expire_run(struct super_block *sb, | |||
484 | spin_lock(&sbi->fs_lock); | 530 | spin_lock(&sbi->fs_lock); |
485 | ino = autofs4_dentry_ino(dentry); | 531 | ino = autofs4_dentry_ino(dentry); |
486 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 532 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
487 | if (!d_unhashed(dentry)) | ||
488 | managed_dentry_clear_transit(dentry); | ||
489 | complete_all(&ino->expire_complete); | 533 | complete_all(&ino->expire_complete); |
490 | spin_unlock(&sbi->fs_lock); | 534 | spin_unlock(&sbi->fs_lock); |
491 | 535 | ||
@@ -513,9 +557,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
513 | spin_lock(&sbi->fs_lock); | 557 | spin_lock(&sbi->fs_lock); |
514 | ino->flags &= ~AUTOFS_INF_EXPIRING; | 558 | ino->flags &= ~AUTOFS_INF_EXPIRING; |
515 | spin_lock(&dentry->d_lock); | 559 | spin_lock(&dentry->d_lock); |
516 | if (ret) | 560 | if (!ret) { |
517 | __managed_dentry_clear_transit(dentry); | ||
518 | else { | ||
519 | if ((IS_ROOT(dentry) || | 561 | if ((IS_ROOT(dentry) || |
520 | (autofs_type_indirect(sbi->type) && | 562 | (autofs_type_indirect(sbi->type) && |
521 | IS_ROOT(dentry->d_parent))) && | 563 | IS_ROOT(dentry->d_parent))) && |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index e6f84d26f4cf..96804a17bbd0 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -23,8 +23,6 @@ | |||
23 | 23 | ||
24 | #include "autofs_i.h" | 24 | #include "autofs_i.h" |
25 | 25 | ||
26 | DEFINE_SPINLOCK(autofs4_lock); | ||
27 | |||
28 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); | 26 | static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *); |
29 | static int autofs4_dir_unlink(struct inode *,struct dentry *); | 27 | static int autofs4_dir_unlink(struct inode *,struct dentry *); |
30 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); |
@@ -125,15 +123,15 @@ static int autofs4_dir_open(struct inode *inode, struct file *file) | |||
125 | * autofs file system so just let the libfs routines handle | 123 | * autofs file system so just let the libfs routines handle |
126 | * it. | 124 | * it. |
127 | */ | 125 | */ |
128 | spin_lock(&autofs4_lock); | 126 | spin_lock(&sbi->lookup_lock); |
129 | spin_lock(&dentry->d_lock); | 127 | spin_lock(&dentry->d_lock); |
130 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | 128 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { |
131 | spin_unlock(&dentry->d_lock); | 129 | spin_unlock(&dentry->d_lock); |
132 | spin_unlock(&autofs4_lock); | 130 | spin_unlock(&sbi->lookup_lock); |
133 | return -ENOENT; | 131 | return -ENOENT; |
134 | } | 132 | } |
135 | spin_unlock(&dentry->d_lock); | 133 | spin_unlock(&dentry->d_lock); |
136 | spin_unlock(&autofs4_lock); | 134 | spin_unlock(&sbi->lookup_lock); |
137 | 135 | ||
138 | out: | 136 | out: |
139 | return dcache_dir_open(inode, file); | 137 | return dcache_dir_open(inode, file); |
@@ -171,7 +169,6 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) | |||
171 | const unsigned char *str = name->name; | 169 | const unsigned char *str = name->name; |
172 | struct list_head *p, *head; | 170 | struct list_head *p, *head; |
173 | 171 | ||
174 | spin_lock(&autofs4_lock); | ||
175 | spin_lock(&sbi->lookup_lock); | 172 | spin_lock(&sbi->lookup_lock); |
176 | head = &sbi->active_list; | 173 | head = &sbi->active_list; |
177 | list_for_each(p, head) { | 174 | list_for_each(p, head) { |
@@ -204,14 +201,12 @@ static struct dentry *autofs4_lookup_active(struct dentry *dentry) | |||
204 | dget_dlock(active); | 201 | dget_dlock(active); |
205 | spin_unlock(&active->d_lock); | 202 | spin_unlock(&active->d_lock); |
206 | spin_unlock(&sbi->lookup_lock); | 203 | spin_unlock(&sbi->lookup_lock); |
207 | spin_unlock(&autofs4_lock); | ||
208 | return active; | 204 | return active; |
209 | } | 205 | } |
210 | next: | 206 | next: |
211 | spin_unlock(&active->d_lock); | 207 | spin_unlock(&active->d_lock); |
212 | } | 208 | } |
213 | spin_unlock(&sbi->lookup_lock); | 209 | spin_unlock(&sbi->lookup_lock); |
214 | spin_unlock(&autofs4_lock); | ||
215 | 210 | ||
216 | return NULL; | 211 | return NULL; |
217 | } | 212 | } |
@@ -226,7 +221,6 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) | |||
226 | const unsigned char *str = name->name; | 221 | const unsigned char *str = name->name; |
227 | struct list_head *p, *head; | 222 | struct list_head *p, *head; |
228 | 223 | ||
229 | spin_lock(&autofs4_lock); | ||
230 | spin_lock(&sbi->lookup_lock); | 224 | spin_lock(&sbi->lookup_lock); |
231 | head = &sbi->expiring_list; | 225 | head = &sbi->expiring_list; |
232 | list_for_each(p, head) { | 226 | list_for_each(p, head) { |
@@ -259,14 +253,12 @@ static struct dentry *autofs4_lookup_expiring(struct dentry *dentry) | |||
259 | dget_dlock(expiring); | 253 | dget_dlock(expiring); |
260 | spin_unlock(&expiring->d_lock); | 254 | spin_unlock(&expiring->d_lock); |
261 | spin_unlock(&sbi->lookup_lock); | 255 | spin_unlock(&sbi->lookup_lock); |
262 | spin_unlock(&autofs4_lock); | ||
263 | return expiring; | 256 | return expiring; |
264 | } | 257 | } |
265 | next: | 258 | next: |
266 | spin_unlock(&expiring->d_lock); | 259 | spin_unlock(&expiring->d_lock); |
267 | } | 260 | } |
268 | spin_unlock(&sbi->lookup_lock); | 261 | spin_unlock(&sbi->lookup_lock); |
269 | spin_unlock(&autofs4_lock); | ||
270 | 262 | ||
271 | return NULL; | 263 | return NULL; |
272 | } | 264 | } |
@@ -275,17 +267,16 @@ static int autofs4_mount_wait(struct dentry *dentry) | |||
275 | { | 267 | { |
276 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); | 268 | struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); |
277 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | 269 | struct autofs_info *ino = autofs4_dentry_ino(dentry); |
278 | int status; | 270 | int status = 0; |
279 | 271 | ||
280 | if (ino->flags & AUTOFS_INF_PENDING) { | 272 | if (ino->flags & AUTOFS_INF_PENDING) { |
281 | DPRINTK("waiting for mount name=%.*s", | 273 | DPRINTK("waiting for mount name=%.*s", |
282 | dentry->d_name.len, dentry->d_name.name); | 274 | dentry->d_name.len, dentry->d_name.name); |
283 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); | 275 | status = autofs4_wait(sbi, dentry, NFY_MOUNT); |
284 | DPRINTK("mount wait done status=%d", status); | 276 | DPRINTK("mount wait done status=%d", status); |
285 | ino->last_used = jiffies; | ||
286 | return status; | ||
287 | } | 277 | } |
288 | return 0; | 278 | ino->last_used = jiffies; |
279 | return status; | ||
289 | } | 280 | } |
290 | 281 | ||
291 | static int do_expire_wait(struct dentry *dentry) | 282 | static int do_expire_wait(struct dentry *dentry) |
@@ -319,9 +310,12 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path) | |||
319 | */ | 310 | */ |
320 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { | 311 | if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) { |
321 | struct dentry *parent = dentry->d_parent; | 312 | struct dentry *parent = dentry->d_parent; |
313 | struct autofs_info *ino; | ||
322 | struct dentry *new = d_lookup(parent, &dentry->d_name); | 314 | struct dentry *new = d_lookup(parent, &dentry->d_name); |
323 | if (!new) | 315 | if (!new) |
324 | return NULL; | 316 | return NULL; |
317 | ino = autofs4_dentry_ino(new); | ||
318 | ino->last_used = jiffies; | ||
325 | dput(path->dentry); | 319 | dput(path->dentry); |
326 | path->dentry = new; | 320 | path->dentry = new; |
327 | } | 321 | } |
@@ -338,18 +332,6 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
338 | DPRINTK("dentry=%p %.*s", | 332 | DPRINTK("dentry=%p %.*s", |
339 | dentry, dentry->d_name.len, dentry->d_name.name); | 333 | dentry, dentry->d_name.len, dentry->d_name.name); |
340 | 334 | ||
341 | /* | ||
342 | * Someone may have manually umounted this or it was a submount | ||
343 | * that has gone away. | ||
344 | */ | ||
345 | spin_lock(&dentry->d_lock); | ||
346 | if (!d_mountpoint(dentry) && list_empty(&dentry->d_subdirs)) { | ||
347 | if (!(dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | ||
348 | (dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | ||
349 | __managed_dentry_set_transit(path->dentry); | ||
350 | } | ||
351 | spin_unlock(&dentry->d_lock); | ||
352 | |||
353 | /* The daemon never triggers a mount. */ | 335 | /* The daemon never triggers a mount. */ |
354 | if (autofs4_oz_mode(sbi)) | 336 | if (autofs4_oz_mode(sbi)) |
355 | return NULL; | 337 | return NULL; |
@@ -418,18 +400,17 @@ static struct vfsmount *autofs4_d_automount(struct path *path) | |||
418 | done: | 400 | done: |
419 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { | 401 | if (!(ino->flags & AUTOFS_INF_EXPIRING)) { |
420 | /* | 402 | /* |
421 | * Any needed mounting has been completed and the path updated | 403 | * Any needed mounting has been completed and the path |
422 | * so turn this into a normal dentry so we don't continually | 404 | * updated so clear DCACHE_NEED_AUTOMOUNT so we don't |
423 | * call ->d_automount() and ->d_manage(). | 405 | * call ->d_automount() on rootless multi-mounts since |
424 | */ | 406 | * it can lead to an incorrect ELOOP error return. |
425 | spin_lock(&dentry->d_lock); | 407 | * |
426 | __managed_dentry_clear_transit(dentry); | ||
427 | /* | ||
428 | * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and | 408 | * Only clear DMANAGED_AUTOMOUNT for rootless multi-mounts and |
429 | * symlinks as in all other cases the dentry will be covered by | 409 | * symlinks as in all other cases the dentry will be covered by |
430 | * an actual mount so ->d_automount() won't be called during | 410 | * an actual mount so ->d_automount() won't be called during |
431 | * the follow. | 411 | * the follow. |
432 | */ | 412 | */ |
413 | spin_lock(&dentry->d_lock); | ||
433 | if ((!d_mountpoint(dentry) && | 414 | if ((!d_mountpoint(dentry) && |
434 | !list_empty(&dentry->d_subdirs)) || | 415 | !list_empty(&dentry->d_subdirs)) || |
435 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) | 416 | (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode))) |
@@ -455,6 +436,8 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) | |||
455 | 436 | ||
456 | /* The daemon never waits. */ | 437 | /* The daemon never waits. */ |
457 | if (autofs4_oz_mode(sbi)) { | 438 | if (autofs4_oz_mode(sbi)) { |
439 | if (rcu_walk) | ||
440 | return 0; | ||
458 | if (!d_mountpoint(dentry)) | 441 | if (!d_mountpoint(dentry)) |
459 | return -EISDIR; | 442 | return -EISDIR; |
460 | return 0; | 443 | return 0; |
@@ -612,12 +595,12 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) | |||
612 | 595 | ||
613 | dir->i_mtime = CURRENT_TIME; | 596 | dir->i_mtime = CURRENT_TIME; |
614 | 597 | ||
615 | spin_lock(&autofs4_lock); | 598 | spin_lock(&sbi->lookup_lock); |
616 | autofs4_add_expiring(dentry); | 599 | __autofs4_add_expiring(dentry); |
617 | spin_lock(&dentry->d_lock); | 600 | spin_lock(&dentry->d_lock); |
618 | __d_drop(dentry); | 601 | __d_drop(dentry); |
619 | spin_unlock(&dentry->d_lock); | 602 | spin_unlock(&dentry->d_lock); |
620 | spin_unlock(&autofs4_lock); | 603 | spin_unlock(&sbi->lookup_lock); |
621 | 604 | ||
622 | return 0; | 605 | return 0; |
623 | } | 606 | } |
@@ -686,20 +669,17 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry) | |||
686 | if (!autofs4_oz_mode(sbi)) | 669 | if (!autofs4_oz_mode(sbi)) |
687 | return -EACCES; | 670 | return -EACCES; |
688 | 671 | ||
689 | spin_lock(&autofs4_lock); | ||
690 | spin_lock(&sbi->lookup_lock); | 672 | spin_lock(&sbi->lookup_lock); |
691 | spin_lock(&dentry->d_lock); | 673 | spin_lock(&dentry->d_lock); |
692 | if (!list_empty(&dentry->d_subdirs)) { | 674 | if (!list_empty(&dentry->d_subdirs)) { |
693 | spin_unlock(&dentry->d_lock); | 675 | spin_unlock(&dentry->d_lock); |
694 | spin_unlock(&sbi->lookup_lock); | 676 | spin_unlock(&sbi->lookup_lock); |
695 | spin_unlock(&autofs4_lock); | ||
696 | return -ENOTEMPTY; | 677 | return -ENOTEMPTY; |
697 | } | 678 | } |
698 | __autofs4_add_expiring(dentry); | 679 | __autofs4_add_expiring(dentry); |
699 | spin_unlock(&sbi->lookup_lock); | ||
700 | __d_drop(dentry); | 680 | __d_drop(dentry); |
701 | spin_unlock(&dentry->d_lock); | 681 | spin_unlock(&dentry->d_lock); |
702 | spin_unlock(&autofs4_lock); | 682 | spin_unlock(&sbi->lookup_lock); |
703 | 683 | ||
704 | if (sbi->version < 5) | 684 | if (sbi->version < 5) |
705 | autofs_clear_leaf_automount_flags(dentry); | 685 | autofs_clear_leaf_automount_flags(dentry); |
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 56010056b2e6..25435987d6ae 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
@@ -197,12 +197,12 @@ rename_retry: | |||
197 | 197 | ||
198 | seq = read_seqbegin(&rename_lock); | 198 | seq = read_seqbegin(&rename_lock); |
199 | rcu_read_lock(); | 199 | rcu_read_lock(); |
200 | spin_lock(&autofs4_lock); | 200 | spin_lock(&sbi->fs_lock); |
201 | for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) | 201 | for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent) |
202 | len += tmp->d_name.len + 1; | 202 | len += tmp->d_name.len + 1; |
203 | 203 | ||
204 | if (!len || --len > NAME_MAX) { | 204 | if (!len || --len > NAME_MAX) { |
205 | spin_unlock(&autofs4_lock); | 205 | spin_unlock(&sbi->fs_lock); |
206 | rcu_read_unlock(); | 206 | rcu_read_unlock(); |
207 | if (read_seqretry(&rename_lock, seq)) | 207 | if (read_seqretry(&rename_lock, seq)) |
208 | goto rename_retry; | 208 | goto rename_retry; |
@@ -218,7 +218,7 @@ rename_retry: | |||
218 | p -= tmp->d_name.len; | 218 | p -= tmp->d_name.len; |
219 | strncpy(p, tmp->d_name.name, tmp->d_name.len); | 219 | strncpy(p, tmp->d_name.name, tmp->d_name.len); |
220 | } | 220 | } |
221 | spin_unlock(&autofs4_lock); | 221 | spin_unlock(&sbi->fs_lock); |
222 | rcu_read_unlock(); | 222 | rcu_read_unlock(); |
223 | if (read_seqretry(&rename_lock, seq)) | 223 | if (read_seqretry(&rename_lock, seq)) |
224 | goto rename_retry; | 224 | goto rename_retry; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 7d02afb2b7f4..c1511c674f53 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -55,11 +55,13 @@ EXPORT_SYMBOL(I_BDEV); | |||
55 | static void bdev_inode_switch_bdi(struct inode *inode, | 55 | static void bdev_inode_switch_bdi(struct inode *inode, |
56 | struct backing_dev_info *dst) | 56 | struct backing_dev_info *dst) |
57 | { | 57 | { |
58 | spin_lock(&inode_lock); | 58 | spin_lock(&inode_wb_list_lock); |
59 | spin_lock(&inode->i_lock); | ||
59 | inode->i_data.backing_dev_info = dst; | 60 | inode->i_data.backing_dev_info = dst; |
60 | if (inode->i_state & I_DIRTY) | 61 | if (inode->i_state & I_DIRTY) |
61 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | 62 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); |
62 | spin_unlock(&inode_lock); | 63 | spin_unlock(&inode->i_lock); |
64 | spin_unlock(&inode_wb_list_lock); | ||
63 | } | 65 | } |
64 | 66 | ||
65 | static sector_t max_block(struct block_device *bdev) | 67 | static sector_t max_block(struct block_device *bdev) |
diff --git a/fs/buffer.c b/fs/buffer.c index 2e6b1a387b7e..a08bb8e61c6f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -1138,7 +1138,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) | |||
1138 | * inode list. | 1138 | * inode list. |
1139 | * | 1139 | * |
1140 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, | 1140 | * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, |
1141 | * mapping->tree_lock and the global inode_lock. | 1141 | * mapping->tree_lock and mapping->host->i_lock. |
1142 | */ | 1142 | */ |
1143 | void mark_buffer_dirty(struct buffer_head *bh) | 1143 | void mark_buffer_dirty(struct buffer_head *bh) |
1144 | { | 1144 | { |
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c index 06d27a41807f..af56ad56a89a 100644 --- a/fs/coda/sysctl.c +++ b/fs/coda/sysctl.c | |||
@@ -61,4 +61,13 @@ void coda_sysctl_clean(void) | |||
61 | fs_table_header = NULL; | 61 | fs_table_header = NULL; |
62 | } | 62 | } |
63 | } | 63 | } |
64 | |||
65 | #else | ||
66 | void coda_sysctl_init(void) | ||
67 | { | ||
68 | } | ||
69 | |||
70 | void coda_sysctl_clean(void) | ||
71 | { | ||
72 | } | ||
64 | #endif | 73 | #endif |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 816f88e6b9ce..98b77c89494c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/writeback.h> | 8 | #include <linux/writeback.h> |
9 | #include <linux/sysctl.h> | 9 | #include <linux/sysctl.h> |
10 | #include <linux/gfp.h> | 10 | #include <linux/gfp.h> |
11 | #include "internal.h" | ||
11 | 12 | ||
12 | /* A global variable is a bit ugly, but it keeps the code simple */ | 13 | /* A global variable is a bit ugly, but it keeps the code simple */ |
13 | int sysctl_drop_caches; | 14 | int sysctl_drop_caches; |
@@ -16,20 +17,23 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
16 | { | 17 | { |
17 | struct inode *inode, *toput_inode = NULL; | 18 | struct inode *inode, *toput_inode = NULL; |
18 | 19 | ||
19 | spin_lock(&inode_lock); | 20 | spin_lock(&inode_sb_list_lock); |
20 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 21 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
21 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 22 | spin_lock(&inode->i_lock); |
22 | continue; | 23 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || |
23 | if (inode->i_mapping->nrpages == 0) | 24 | (inode->i_mapping->nrpages == 0)) { |
25 | spin_unlock(&inode->i_lock); | ||
24 | continue; | 26 | continue; |
27 | } | ||
25 | __iget(inode); | 28 | __iget(inode); |
26 | spin_unlock(&inode_lock); | 29 | spin_unlock(&inode->i_lock); |
30 | spin_unlock(&inode_sb_list_lock); | ||
27 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 31 | invalidate_mapping_pages(inode->i_mapping, 0, -1); |
28 | iput(toput_inode); | 32 | iput(toput_inode); |
29 | toput_inode = inode; | 33 | toput_inode = inode; |
30 | spin_lock(&inode_lock); | 34 | spin_lock(&inode_sb_list_lock); |
31 | } | 35 | } |
32 | spin_unlock(&inode_lock); | 36 | spin_unlock(&inode_sb_list_lock); |
33 | iput(toput_inode); | 37 | iput(toput_inode); |
34 | } | 38 | } |
35 | 39 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index adf96b822781..97b970e7dd13 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "mballoc.h" | 22 | #include "mballoc.h" |
23 | 23 | ||
24 | #include <trace/events/ext4.h> | ||
25 | |||
24 | /* | 26 | /* |
25 | * balloc.c contains the blocks allocation and deallocation routines | 27 | * balloc.c contains the blocks allocation and deallocation routines |
26 | */ | 28 | */ |
@@ -342,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
342 | * We do it here so the bitmap uptodate bit | 344 | * We do it here so the bitmap uptodate bit |
343 | * get set with buffer lock held. | 345 | * get set with buffer lock held. |
344 | */ | 346 | */ |
347 | trace_ext4_read_block_bitmap_load(sb, block_group); | ||
345 | set_bitmap_uptodate(bh); | 348 | set_bitmap_uptodate(bh); |
346 | if (bh_submit_read(bh) < 0) { | 349 | if (bh_submit_read(bh) < 0) { |
347 | put_bh(bh); | 350 | put_bh(bh); |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index d8b992e658c1..e25e99bf7ee1 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -202,13 +202,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
202 | return 1; | 202 | return 1; |
203 | } | 203 | } |
204 | 204 | ||
205 | static inline void ext4_journal_release_buffer(handle_t *handle, | ||
206 | struct buffer_head *bh) | ||
207 | { | ||
208 | if (ext4_handle_valid(handle)) | ||
209 | jbd2_journal_release_buffer(handle, bh); | ||
210 | } | ||
211 | |||
212 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | 205 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) |
213 | { | 206 | { |
214 | return ext4_journal_start_sb(inode->i_sb, nblocks); | 207 | return ext4_journal_start_sb(inode->i_sb, nblocks); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7516fb9c0bd5..dd2cb5076ff9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,6 +44,8 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | #include <trace/events/ext4.h> | ||
48 | |||
47 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 49 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
48 | struct inode *inode, | 50 | struct inode *inode, |
49 | int needed) | 51 | int needed) |
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
664 | if (unlikely(!bh)) | 666 | if (unlikely(!bh)) |
665 | goto err; | 667 | goto err; |
666 | if (!bh_uptodate_or_lock(bh)) { | 668 | if (!bh_uptodate_or_lock(bh)) { |
669 | trace_ext4_ext_load_extent(inode, block, | ||
670 | path[ppos].p_block); | ||
667 | if (bh_submit_read(bh) < 0) { | 671 | if (bh_submit_read(bh) < 0) { |
668 | put_bh(bh); | 672 | put_bh(bh); |
669 | goto err; | 673 | goto err; |
@@ -1034,7 +1038,7 @@ cleanup: | |||
1034 | for (i = 0; i < depth; i++) { | 1038 | for (i = 0; i < depth; i++) { |
1035 | if (!ablocks[i]) | 1039 | if (!ablocks[i]) |
1036 | continue; | 1040 | continue; |
1037 | ext4_free_blocks(handle, inode, 0, ablocks[i], 1, | 1041 | ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, |
1038 | EXT4_FREE_BLOCKS_METADATA); | 1042 | EXT4_FREE_BLOCKS_METADATA); |
1039 | } | 1043 | } |
1040 | } | 1044 | } |
@@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2059 | if (err) | 2063 | if (err) |
2060 | return err; | 2064 | return err; |
2061 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2065 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
2062 | ext4_free_blocks(handle, inode, 0, leaf, 1, | 2066 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
2063 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2067 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
2064 | return err; | 2068 | return err; |
2065 | } | 2069 | } |
@@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2156 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2160 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2157 | start = ext4_ext_pblock(ex) + ee_len - num; | 2161 | start = ext4_ext_pblock(ex) + ee_len - num; |
2158 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2162 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2159 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2163 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
2160 | } else if (from == le32_to_cpu(ex->ee_block) | 2164 | } else if (from == le32_to_cpu(ex->ee_block) |
2161 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2165 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2162 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | 2166 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", |
@@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3108 | { | 3112 | { |
3109 | int i, depth; | 3113 | int i, depth; |
3110 | struct ext4_extent_header *eh; | 3114 | struct ext4_extent_header *eh; |
3111 | struct ext4_extent *ex, *last_ex; | 3115 | struct ext4_extent *last_ex; |
3112 | 3116 | ||
3113 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | 3117 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) |
3114 | return 0; | 3118 | return 0; |
3115 | 3119 | ||
3116 | depth = ext_depth(inode); | 3120 | depth = ext_depth(inode); |
3117 | eh = path[depth].p_hdr; | 3121 | eh = path[depth].p_hdr; |
3118 | ex = path[depth].p_ext; | ||
3119 | 3122 | ||
3120 | if (unlikely(!eh->eh_entries)) { | 3123 | if (unlikely(!eh->eh_entries)) { |
3121 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | 3124 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " |
@@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3295 | struct ext4_map_blocks *map, int flags) | 3298 | struct ext4_map_blocks *map, int flags) |
3296 | { | 3299 | { |
3297 | struct ext4_ext_path *path = NULL; | 3300 | struct ext4_ext_path *path = NULL; |
3298 | struct ext4_extent_header *eh; | ||
3299 | struct ext4_extent newex, *ex; | 3301 | struct ext4_extent newex, *ex; |
3300 | ext4_fsblk_t newblock; | 3302 | ext4_fsblk_t newblock = 0; |
3301 | int err = 0, depth, ret; | 3303 | int err = 0, depth, ret; |
3302 | unsigned int allocated = 0; | 3304 | unsigned int allocated = 0; |
3303 | struct ext4_allocation_request ar; | 3305 | struct ext4_allocation_request ar; |
@@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3305 | 3307 | ||
3306 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3308 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3307 | map->m_lblk, map->m_len, inode->i_ino); | 3309 | map->m_lblk, map->m_len, inode->i_ino); |
3310 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
3308 | 3311 | ||
3309 | /* check in cache */ | 3312 | /* check in cache */ |
3310 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3313 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
@@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3352 | err = -EIO; | 3355 | err = -EIO; |
3353 | goto out2; | 3356 | goto out2; |
3354 | } | 3357 | } |
3355 | eh = path[depth].p_hdr; | ||
3356 | 3358 | ||
3357 | ex = path[depth].p_ext; | 3359 | ex = path[depth].p_ext; |
3358 | if (ex) { | 3360 | if (ex) { |
@@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3485 | /* not a good idea to call discard here directly, | 3487 | /* not a good idea to call discard here directly, |
3486 | * but otherwise we'd need to call it every free() */ | 3488 | * but otherwise we'd need to call it every free() */ |
3487 | ext4_discard_preallocations(inode); | 3489 | ext4_discard_preallocations(inode); |
3488 | ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), | 3490 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), |
3489 | ext4_ext_get_actual_len(&newex), 0); | 3491 | ext4_ext_get_actual_len(&newex), 0); |
3490 | goto out2; | 3492 | goto out2; |
3491 | } | 3493 | } |
@@ -3525,6 +3527,8 @@ out2: | |||
3525 | ext4_ext_drop_refs(path); | 3527 | ext4_ext_drop_refs(path); |
3526 | kfree(path); | 3528 | kfree(path); |
3527 | } | 3529 | } |
3530 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
3531 | newblock, map->m_len, err ? err : allocated); | ||
3528 | return err ? err : allocated; | 3532 | return err ? err : allocated; |
3529 | } | 3533 | } |
3530 | 3534 | ||
@@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3658 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3662 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3659 | return -EOPNOTSUPP; | 3663 | return -EOPNOTSUPP; |
3660 | 3664 | ||
3665 | trace_ext4_fallocate_enter(inode, offset, len, mode); | ||
3661 | map.m_lblk = offset >> blkbits; | 3666 | map.m_lblk = offset >> blkbits; |
3662 | /* | 3667 | /* |
3663 | * We can't just convert len to max_blocks because | 3668 | * We can't just convert len to max_blocks because |
@@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3673 | ret = inode_newsize_ok(inode, (len + offset)); | 3678 | ret = inode_newsize_ok(inode, (len + offset)); |
3674 | if (ret) { | 3679 | if (ret) { |
3675 | mutex_unlock(&inode->i_mutex); | 3680 | mutex_unlock(&inode->i_mutex); |
3681 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
3676 | return ret; | 3682 | return ret; |
3677 | } | 3683 | } |
3678 | retry: | 3684 | retry: |
@@ -3717,6 +3723,8 @@ retry: | |||
3717 | goto retry; | 3723 | goto retry; |
3718 | } | 3724 | } |
3719 | mutex_unlock(&inode->i_mutex); | 3725 | mutex_unlock(&inode->i_mutex); |
3726 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | ||
3727 | ret > 0 ? ret2 : ret); | ||
3720 | return ret > 0 ? ret2 : ret; | 3728 | return ret > 0 ? ret2 : ret; |
3721 | } | 3729 | } |
3722 | 3730 | ||
@@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3775 | } | 3783 | } |
3776 | return ret > 0 ? ret2 : ret; | 3784 | return ret > 0 ? ret2 : ret; |
3777 | } | 3785 | } |
3786 | |||
3778 | /* | 3787 | /* |
3779 | * Callback function called for each extent to gather FIEMAP information. | 3788 | * Callback function called for each extent to gather FIEMAP information. |
3780 | */ | 3789 | */ |
@@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3782 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | 3791 | struct ext4_ext_cache *newex, struct ext4_extent *ex, |
3783 | void *data) | 3792 | void *data) |
3784 | { | 3793 | { |
3785 | struct fiemap_extent_info *fieinfo = data; | ||
3786 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3787 | __u64 logical; | 3794 | __u64 logical; |
3788 | __u64 physical; | 3795 | __u64 physical; |
3789 | __u64 length; | 3796 | __u64 length; |
3797 | loff_t size; | ||
3790 | __u32 flags = 0; | 3798 | __u32 flags = 0; |
3791 | int error; | 3799 | int ret = 0; |
3800 | struct fiemap_extent_info *fieinfo = data; | ||
3801 | unsigned char blksize_bits; | ||
3792 | 3802 | ||
3793 | logical = (__u64)newex->ec_block << blksize_bits; | 3803 | blksize_bits = inode->i_sb->s_blocksize_bits; |
3804 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3794 | 3805 | ||
3795 | if (newex->ec_start == 0) { | 3806 | if (newex->ec_start == 0) { |
3796 | pgoff_t offset; | 3807 | /* |
3797 | struct page *page; | 3808 | * No extent in extent-tree contains block @newex->ec_start, |
3809 | * then the block may stay in 1)a hole or 2)delayed-extent. | ||
3810 | * | ||
3811 | * Holes or delayed-extents are processed as follows. | ||
3812 | * 1. lookup dirty pages with specified range in pagecache. | ||
3813 | * If no page is got, then there is no delayed-extent and | ||
3814 | * return with EXT_CONTINUE. | ||
3815 | * 2. find the 1st mapped buffer, | ||
3816 | * 3. check if the mapped buffer is both in the request range | ||
3817 | * and a delayed buffer. If not, there is no delayed-extent, | ||
3818 | * then return. | ||
3819 | * 4. a delayed-extent is found, the extent will be collected. | ||
3820 | */ | ||
3821 | ext4_lblk_t end = 0; | ||
3822 | pgoff_t last_offset; | ||
3823 | pgoff_t offset; | ||
3824 | pgoff_t index; | ||
3825 | struct page **pages = NULL; | ||
3798 | struct buffer_head *bh = NULL; | 3826 | struct buffer_head *bh = NULL; |
3827 | struct buffer_head *head = NULL; | ||
3828 | unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); | ||
3829 | |||
3830 | pages = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3831 | if (pages == NULL) | ||
3832 | return -ENOMEM; | ||
3799 | 3833 | ||
3800 | offset = logical >> PAGE_SHIFT; | 3834 | offset = logical >> PAGE_SHIFT; |
3801 | page = find_get_page(inode->i_mapping, offset); | 3835 | repeat: |
3802 | if (!page || !page_has_buffers(page)) | 3836 | last_offset = offset; |
3803 | return EXT_CONTINUE; | 3837 | head = NULL; |
3838 | ret = find_get_pages_tag(inode->i_mapping, &offset, | ||
3839 | PAGECACHE_TAG_DIRTY, nr_pages, pages); | ||
3840 | |||
3841 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3842 | /* First time, try to find a mapped buffer. */ | ||
3843 | if (ret == 0) { | ||
3844 | out: | ||
3845 | for (index = 0; index < ret; index++) | ||
3846 | page_cache_release(pages[index]); | ||
3847 | /* just a hole. */ | ||
3848 | kfree(pages); | ||
3849 | return EXT_CONTINUE; | ||
3850 | } | ||
3804 | 3851 | ||
3805 | bh = page_buffers(page); | 3852 | /* Try to find the 1st mapped buffer. */ |
3853 | end = ((__u64)pages[0]->index << PAGE_SHIFT) >> | ||
3854 | blksize_bits; | ||
3855 | if (!page_has_buffers(pages[0])) | ||
3856 | goto out; | ||
3857 | head = page_buffers(pages[0]); | ||
3858 | if (!head) | ||
3859 | goto out; | ||
3806 | 3860 | ||
3807 | if (!bh) | 3861 | bh = head; |
3808 | return EXT_CONTINUE; | 3862 | do { |
3863 | if (buffer_mapped(bh)) { | ||
3864 | /* get the 1st mapped buffer. */ | ||
3865 | if (end > newex->ec_block + | ||
3866 | newex->ec_len) | ||
3867 | /* The buffer is out of | ||
3868 | * the request range. | ||
3869 | */ | ||
3870 | goto out; | ||
3871 | goto found_mapped_buffer; | ||
3872 | } | ||
3873 | bh = bh->b_this_page; | ||
3874 | end++; | ||
3875 | } while (bh != head); | ||
3809 | 3876 | ||
3810 | if (buffer_delay(bh)) { | 3877 | /* No mapped buffer found. */ |
3811 | flags |= FIEMAP_EXTENT_DELALLOC; | 3878 | goto out; |
3812 | page_cache_release(page); | ||
3813 | } else { | 3879 | } else { |
3814 | page_cache_release(page); | 3880 | /*Find contiguous delayed buffers. */ |
3815 | return EXT_CONTINUE; | 3881 | if (ret > 0 && pages[0]->index == last_offset) |
3882 | head = page_buffers(pages[0]); | ||
3883 | bh = head; | ||
3816 | } | 3884 | } |
3885 | |||
3886 | found_mapped_buffer: | ||
3887 | if (bh != NULL && buffer_delay(bh)) { | ||
3888 | /* 1st or contiguous delayed buffer found. */ | ||
3889 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3890 | /* | ||
3891 | * 1st delayed buffer found, record | ||
3892 | * the start of extent. | ||
3893 | */ | ||
3894 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3895 | newex->ec_block = end; | ||
3896 | logical = (__u64)end << blksize_bits; | ||
3897 | } | ||
3898 | /* Find contiguous delayed buffers. */ | ||
3899 | do { | ||
3900 | if (!buffer_delay(bh)) | ||
3901 | goto found_delayed_extent; | ||
3902 | bh = bh->b_this_page; | ||
3903 | end++; | ||
3904 | } while (bh != head); | ||
3905 | |||
3906 | for (index = 1; index < ret; index++) { | ||
3907 | if (!page_has_buffers(pages[index])) { | ||
3908 | bh = NULL; | ||
3909 | break; | ||
3910 | } | ||
3911 | head = page_buffers(pages[index]); | ||
3912 | if (!head) { | ||
3913 | bh = NULL; | ||
3914 | break; | ||
3915 | } | ||
3916 | if (pages[index]->index != | ||
3917 | pages[0]->index + index) { | ||
3918 | /* Blocks are not contiguous. */ | ||
3919 | bh = NULL; | ||
3920 | break; | ||
3921 | } | ||
3922 | bh = head; | ||
3923 | do { | ||
3924 | if (!buffer_delay(bh)) | ||
3925 | /* Delayed-extent ends. */ | ||
3926 | goto found_delayed_extent; | ||
3927 | bh = bh->b_this_page; | ||
3928 | end++; | ||
3929 | } while (bh != head); | ||
3930 | } | ||
3931 | } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) | ||
3932 | /* a hole found. */ | ||
3933 | goto out; | ||
3934 | |||
3935 | found_delayed_extent: | ||
3936 | newex->ec_len = min(end - newex->ec_block, | ||
3937 | (ext4_lblk_t)EXT_INIT_MAX_LEN); | ||
3938 | if (ret == nr_pages && bh != NULL && | ||
3939 | newex->ec_len < EXT_INIT_MAX_LEN && | ||
3940 | buffer_delay(bh)) { | ||
3941 | /* Have not collected an extent and continue. */ | ||
3942 | for (index = 0; index < ret; index++) | ||
3943 | page_cache_release(pages[index]); | ||
3944 | goto repeat; | ||
3945 | } | ||
3946 | |||
3947 | for (index = 0; index < ret; index++) | ||
3948 | page_cache_release(pages[index]); | ||
3949 | kfree(pages); | ||
3817 | } | 3950 | } |
3818 | 3951 | ||
3819 | physical = (__u64)newex->ec_start << blksize_bits; | 3952 | physical = (__u64)newex->ec_start << blksize_bits; |
@@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3822 | if (ex && ext4_ext_is_uninitialized(ex)) | 3955 | if (ex && ext4_ext_is_uninitialized(ex)) |
3823 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 3956 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
3824 | 3957 | ||
3825 | /* | 3958 | size = i_size_read(inode); |
3826 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | 3959 | if (logical + length >= size) |
3827 | * | ||
3828 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3829 | * this also indicates no more allocated blocks. | ||
3830 | * | ||
3831 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3832 | */ | ||
3833 | if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || | ||
3834 | newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { | ||
3835 | loff_t size = i_size_read(inode); | ||
3836 | loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); | ||
3837 | |||
3838 | flags |= FIEMAP_EXTENT_LAST; | 3960 | flags |= FIEMAP_EXTENT_LAST; |
3839 | if ((flags & FIEMAP_EXTENT_DELALLOC) && | ||
3840 | logical+length > size) | ||
3841 | length = (size - logical + bs - 1) & ~(bs-1); | ||
3842 | } | ||
3843 | 3961 | ||
3844 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | 3962 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, |
3845 | length, flags); | 3963 | length, flags); |
3846 | if (error < 0) | 3964 | if (ret < 0) |
3847 | return error; | 3965 | return ret; |
3848 | if (error == 1) | 3966 | if (ret == 1) |
3849 | return EXT_BREAK; | 3967 | return EXT_BREAK; |
3850 | |||
3851 | return EXT_CONTINUE; | 3968 | return EXT_CONTINUE; |
3852 | } | 3969 | } |
3853 | 3970 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7829b287822a..7f74019d6d77 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -164,20 +164,20 @@ int ext4_sync_file(struct file *file, int datasync) | |||
164 | 164 | ||
165 | J_ASSERT(ext4_journal_current_handle() == NULL); | 165 | J_ASSERT(ext4_journal_current_handle() == NULL); |
166 | 166 | ||
167 | trace_ext4_sync_file(file, datasync); | 167 | trace_ext4_sync_file_enter(file, datasync); |
168 | 168 | ||
169 | if (inode->i_sb->s_flags & MS_RDONLY) | 169 | if (inode->i_sb->s_flags & MS_RDONLY) |
170 | return 0; | 170 | return 0; |
171 | 171 | ||
172 | ret = ext4_flush_completed_IO(inode); | 172 | ret = ext4_flush_completed_IO(inode); |
173 | if (ret < 0) | 173 | if (ret < 0) |
174 | return ret; | 174 | goto out; |
175 | 175 | ||
176 | if (!journal) { | 176 | if (!journal) { |
177 | ret = generic_file_fsync(file, datasync); | 177 | ret = generic_file_fsync(file, datasync); |
178 | if (!ret && !list_empty(&inode->i_dentry)) | 178 | if (!ret && !list_empty(&inode->i_dentry)) |
179 | ext4_sync_parent(inode); | 179 | ext4_sync_parent(inode); |
180 | return ret; | 180 | goto out; |
181 | } | 181 | } |
182 | 182 | ||
183 | /* | 183 | /* |
@@ -194,8 +194,10 @@ int ext4_sync_file(struct file *file, int datasync) | |||
194 | * (they were dirtied by commit). But that's OK - the blocks are | 194 | * (they were dirtied by commit). But that's OK - the blocks are |
195 | * safe in-journal, which is all fsync() needs to ensure. | 195 | * safe in-journal, which is all fsync() needs to ensure. |
196 | */ | 196 | */ |
197 | if (ext4_should_journal_data(inode)) | 197 | if (ext4_should_journal_data(inode)) { |
198 | return ext4_force_commit(inode->i_sb); | 198 | ret = ext4_force_commit(inode->i_sb); |
199 | goto out; | ||
200 | } | ||
199 | 201 | ||
200 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 202 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
201 | if (jbd2_log_start_commit(journal, commit_tid)) { | 203 | if (jbd2_log_start_commit(journal, commit_tid)) { |
@@ -215,5 +217,7 @@ int ext4_sync_file(struct file *file, int datasync) | |||
215 | ret = jbd2_log_wait_commit(journal, commit_tid); | 217 | ret = jbd2_log_wait_commit(journal, commit_tid); |
216 | } else if (journal->j_flags & JBD2_BARRIER) | 218 | } else if (journal->j_flags & JBD2_BARRIER) |
217 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 219 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
220 | out: | ||
221 | trace_ext4_sync_file_exit(inode, ret); | ||
218 | return ret; | 222 | return ret; |
219 | } | 223 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 78b79e1bd7ed..21bb2f61e502 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -152,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
152 | * We do it here so the bitmap uptodate bit | 152 | * We do it here so the bitmap uptodate bit |
153 | * get set with buffer lock held. | 153 | * get set with buffer lock held. |
154 | */ | 154 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | ||
155 | set_bitmap_uptodate(bh); | 156 | set_bitmap_uptodate(bh); |
156 | if (bh_submit_read(bh) < 0) { | 157 | if (bh_submit_read(bh) < 0) { |
157 | put_bh(bh); | 158 | put_bh(bh); |
@@ -649,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
649 | *group = parent_group + flex_size; | 650 | *group = parent_group + flex_size; |
650 | if (*group > ngroups) | 651 | if (*group > ngroups) |
651 | *group = 0; | 652 | *group = 0; |
652 | return find_group_orlov(sb, parent, group, mode, 0); | 653 | return find_group_orlov(sb, parent, group, mode, NULL); |
653 | } | 654 | } |
654 | 655 | ||
655 | /* | 656 | /* |
@@ -1054,6 +1055,11 @@ got: | |||
1054 | } | 1055 | } |
1055 | } | 1056 | } |
1056 | 1057 | ||
1058 | if (ext4_handle_valid(handle)) { | ||
1059 | ei->i_sync_tid = handle->h_transaction->t_tid; | ||
1060 | ei->i_datasync_tid = handle->h_transaction->t_tid; | ||
1061 | } | ||
1062 | |||
1057 | err = ext4_mark_inode_dirty(handle, inode); | 1063 | err = ext4_mark_inode_dirty(handle, inode); |
1058 | if (err) { | 1064 | if (err) { |
1059 | ext4_std_error(sb, err); | 1065 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9297ad46c465..1a86282b9024 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
174 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
175 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
176 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | 176 | ret = ext4_journal_restart(handle, nblocks); |
177 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
178 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
179 | 179 | ||
@@ -720,7 +720,7 @@ allocated: | |||
720 | return ret; | 720 | return ret; |
721 | failed_out: | 721 | failed_out: |
722 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
723 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
724 | return ret; | 724 | return ret; |
725 | } | 725 | } |
726 | 726 | ||
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
823 | return err; | 823 | return err; |
824 | failed: | 824 | failed: |
825 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
826 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
827 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
828 | /* | 828 | /* |
829 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
830 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
831 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
832 | */ | 832 | */ |
833 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
834 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
835 | } | 835 | } |
836 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
837 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
838 | 838 | ||
839 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
840 | 840 | ||
841 | return err; | 841 | return err; |
842 | } | 842 | } |
@@ -924,7 +924,7 @@ err_out: | |||
924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
925 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
926 | } | 926 | } |
927 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
928 | blks, 0); | 928 | blks, 0); |
929 | 929 | ||
930 | return err; | 930 | return err; |
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
973 | int count = 0; | 973 | int count = 0; |
974 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
975 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
976 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
977 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
978 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
@@ -1058,6 +1059,8 @@ cleanup: | |||
1058 | partial--; | 1059 | partial--; |
1059 | } | 1060 | } |
1060 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1061 | return err; | 1064 | return err; |
1062 | } | 1065 | } |
1063 | 1066 | ||
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2060 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2061 | break; | 2064 | break; |
2062 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2063 | int commit_write = 0, redirty_page = 0; | 2066 | int commit_write = 0, skip_page = 0; |
2064 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2065 | 2068 | ||
2066 | index = page->index; | 2069 | index = page->index; |
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2086 | * If the page does not have buffers (for | 2089 | * If the page does not have buffers (for |
2087 | * whatever reason), try to create them using | 2090 | * whatever reason), try to create them using |
2088 | * __block_write_begin. If this fails, | 2091 | * __block_write_begin. If this fails, |
2089 | * redirty the page and move on. | 2092 | * skip the page and move on. |
2090 | */ | 2093 | */ |
2091 | if (!page_has_buffers(page)) { | 2094 | if (!page_has_buffers(page)) { |
2092 | if (__block_write_begin(page, 0, len, | 2095 | if (__block_write_begin(page, 0, len, |
2093 | noalloc_get_block_write)) { | 2096 | noalloc_get_block_write)) { |
2094 | redirty_page: | 2097 | skip_page: |
2095 | redirty_page_for_writepage(mpd->wbc, | ||
2096 | page); | ||
2097 | unlock_page(page); | 2098 | unlock_page(page); |
2098 | continue; | 2099 | continue; |
2099 | } | 2100 | } |
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2104 | block_start = 0; | 2105 | block_start = 0; |
2105 | do { | 2106 | do { |
2106 | if (!bh) | 2107 | if (!bh) |
2107 | goto redirty_page; | 2108 | goto skip_page; |
2108 | if (map && (cur_logical >= map->m_lblk) && | 2109 | if (map && (cur_logical >= map->m_lblk) && |
2109 | (cur_logical <= (map->m_lblk + | 2110 | (cur_logical <= (map->m_lblk + |
2110 | (map->m_len - 1)))) { | 2111 | (map->m_len - 1)))) { |
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2120 | clear_buffer_unwritten(bh); | 2121 | clear_buffer_unwritten(bh); |
2121 | } | 2122 | } |
2122 | 2123 | ||
2123 | /* redirty page if block allocation undone */ | 2124 | /* skip page if block allocation undone */ |
2124 | if (buffer_delay(bh) || buffer_unwritten(bh)) | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2125 | redirty_page = 1; | 2126 | skip_page = 1; |
2126 | bh = bh->b_this_page; | 2127 | bh = bh->b_this_page; |
2127 | block_start += bh->b_size; | 2128 | block_start += bh->b_size; |
2128 | cur_logical++; | 2129 | cur_logical++; |
2129 | pblock++; | 2130 | pblock++; |
2130 | } while (bh != page_bufs); | 2131 | } while (bh != page_bufs); |
2131 | 2132 | ||
2132 | if (redirty_page) | 2133 | if (skip_page) |
2133 | goto redirty_page; | 2134 | goto skip_page; |
2134 | 2135 | ||
2135 | if (commit_write) | 2136 | if (commit_write) |
2136 | /* mark the buffer_heads as dirty & uptodate */ | 2137 | /* mark the buffer_heads as dirty & uptodate */ |
2137 | block_commit_write(page, 0, len); | 2138 | block_commit_write(page, 0, len); |
2138 | 2139 | ||
2140 | clear_page_dirty_for_io(page); | ||
2139 | /* | 2141 | /* |
2140 | * Delalloc doesn't support data journalling, | 2142 | * Delalloc doesn't support data journalling, |
2141 | * but eventually maybe we'll lift this | 2143 | * but eventually maybe we'll lift this |
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2165 | return ret; | 2167 | return ret; |
2166 | } | 2168 | } |
2167 | 2169 | ||
2168 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) |
2169 | sector_t logical, long blk_cnt) | ||
2170 | { | 2171 | { |
2171 | int nr_pages, i; | 2172 | int nr_pages, i; |
2172 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2174 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2175 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2176 | 2177 | ||
2177 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2178 | index = mpd->first_page; |
2178 | end = (logical + blk_cnt - 1) >> | 2179 | end = mpd->next_page - 1; |
2179 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2180 | while (index <= end) { | 2180 | while (index <= end) { |
2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2182 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2279 | err = blks; | 2279 | err = blks; |
2280 | /* | 2280 | /* |
2281 | * If get block returns EAGAIN or ENOSPC and there | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2282 | * appears to be free blocks we will call | 2282 | * appears to be free blocks we will just let |
2283 | * ext4_writepage() for all of the pages which will | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2284 | * just redirty the pages. | ||
2285 | */ | 2284 | */ |
2286 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2287 | goto submit_io; | 2286 | goto submit_io; |
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2312 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2313 | } | 2312 | } |
2314 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2315 | ext4_da_block_invalidatepages(mpd, next, | 2314 | ext4_da_block_invalidatepages(mpd); |
2316 | mpd->b_size >> mpd->inode->i_blkbits); | 2315 | |
2316 | /* Mark this page range as having been completed */ | ||
2317 | mpd->io_done = 1; | ||
2317 | return; | 2318 | return; |
2318 | } | 2319 | } |
2319 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2438 | } | 2439 | } |
2439 | 2440 | ||
2440 | /* | 2441 | /* |
2441 | * __mpage_da_writepage - finds extent of pages and blocks | ||
2442 | * | ||
2443 | * @page: page to consider | ||
2444 | * @wbc: not used, we just follow rules | ||
2445 | * @data: context | ||
2446 | * | ||
2447 | * The function finds extents of pages and scan them for all blocks. | ||
2448 | */ | ||
2449 | static int __mpage_da_writepage(struct page *page, | ||
2450 | struct writeback_control *wbc, | ||
2451 | struct mpage_da_data *mpd) | ||
2452 | { | ||
2453 | struct inode *inode = mpd->inode; | ||
2454 | struct buffer_head *bh, *head; | ||
2455 | sector_t logical; | ||
2456 | |||
2457 | /* | ||
2458 | * Can we merge this page to current extent? | ||
2459 | */ | ||
2460 | if (mpd->next_page != page->index) { | ||
2461 | /* | ||
2462 | * Nope, we can't. So, we map non-allocated blocks | ||
2463 | * and start IO on them | ||
2464 | */ | ||
2465 | if (mpd->next_page != mpd->first_page) { | ||
2466 | mpage_da_map_and_submit(mpd); | ||
2467 | /* | ||
2468 | * skip rest of the page in the page_vec | ||
2469 | */ | ||
2470 | redirty_page_for_writepage(wbc, page); | ||
2471 | unlock_page(page); | ||
2472 | return MPAGE_DA_EXTENT_TAIL; | ||
2473 | } | ||
2474 | |||
2475 | /* | ||
2476 | * Start next extent of pages ... | ||
2477 | */ | ||
2478 | mpd->first_page = page->index; | ||
2479 | |||
2480 | /* | ||
2481 | * ... and blocks | ||
2482 | */ | ||
2483 | mpd->b_size = 0; | ||
2484 | mpd->b_state = 0; | ||
2485 | mpd->b_blocknr = 0; | ||
2486 | } | ||
2487 | |||
2488 | mpd->next_page = page->index + 1; | ||
2489 | logical = (sector_t) page->index << | ||
2490 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2491 | |||
2492 | if (!page_has_buffers(page)) { | ||
2493 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, | ||
2494 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2495 | if (mpd->io_done) | ||
2496 | return MPAGE_DA_EXTENT_TAIL; | ||
2497 | } else { | ||
2498 | /* | ||
2499 | * Page with regular buffer heads, just add all dirty ones | ||
2500 | */ | ||
2501 | head = page_buffers(page); | ||
2502 | bh = head; | ||
2503 | do { | ||
2504 | BUG_ON(buffer_locked(bh)); | ||
2505 | /* | ||
2506 | * We need to try to allocate | ||
2507 | * unmapped blocks in the same page. | ||
2508 | * Otherwise we won't make progress | ||
2509 | * with the page in ext4_writepage | ||
2510 | */ | ||
2511 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2512 | mpage_add_bh_to_extent(mpd, logical, | ||
2513 | bh->b_size, | ||
2514 | bh->b_state); | ||
2515 | if (mpd->io_done) | ||
2516 | return MPAGE_DA_EXTENT_TAIL; | ||
2517 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2518 | /* | ||
2519 | * mapped dirty buffer. We need to update | ||
2520 | * the b_state because we look at | ||
2521 | * b_state in mpage_da_map_blocks. We don't | ||
2522 | * update b_size because if we find an | ||
2523 | * unmapped buffer_head later we need to | ||
2524 | * use the b_state flag of that buffer_head. | ||
2525 | */ | ||
2526 | if (mpd->b_size == 0) | ||
2527 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2528 | } | ||
2529 | logical++; | ||
2530 | } while ((bh = bh->b_this_page) != head); | ||
2531 | } | ||
2532 | |||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | /* | ||
2537 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2538 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2539 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
@@ -2597,7 +2502,6 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2597 | * for partial write. | 2502 | * for partial write. |
2598 | */ | 2503 | */ |
2599 | set_buffer_new(bh); | 2504 | set_buffer_new(bh); |
2600 | set_buffer_mapped(bh); | ||
2601 | } | 2505 | } |
2602 | return 0; | 2506 | return 0; |
2603 | } | 2507 | } |
@@ -2811,27 +2715,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2811 | 2715 | ||
2812 | /* | 2716 | /* |
2813 | * write_cache_pages_da - walk the list of dirty pages of the given | 2717 | * write_cache_pages_da - walk the list of dirty pages of the given |
2814 | * address space and call the callback function (which usually writes | 2718 | * address space and accumulate pages that need writing, and call |
2815 | * the pages). | 2719 | * mpage_da_map_and_submit to map a single contiguous memory region |
2816 | * | 2720 | * and then write them. |
2817 | * This is a forked version of write_cache_pages(). Differences: | ||
2818 | * Range cyclic is ignored. | ||
2819 | * no_nrwrite_index_update is always presumed true | ||
2820 | */ | 2721 | */ |
2821 | static int write_cache_pages_da(struct address_space *mapping, | 2722 | static int write_cache_pages_da(struct address_space *mapping, |
2822 | struct writeback_control *wbc, | 2723 | struct writeback_control *wbc, |
2823 | struct mpage_da_data *mpd, | 2724 | struct mpage_da_data *mpd, |
2824 | pgoff_t *done_index) | 2725 | pgoff_t *done_index) |
2825 | { | 2726 | { |
2826 | int ret = 0; | 2727 | struct buffer_head *bh, *head; |
2827 | int done = 0; | 2728 | struct inode *inode = mapping->host; |
2828 | struct pagevec pvec; | 2729 | struct pagevec pvec; |
2829 | unsigned nr_pages; | 2730 | unsigned int nr_pages; |
2830 | pgoff_t index; | 2731 | sector_t logical; |
2831 | pgoff_t end; /* Inclusive */ | 2732 | pgoff_t index, end; |
2832 | long nr_to_write = wbc->nr_to_write; | 2733 | long nr_to_write = wbc->nr_to_write; |
2833 | int tag; | 2734 | int i, tag, ret = 0; |
2834 | 2735 | ||
2736 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2737 | mpd->wbc = wbc; | ||
2738 | mpd->inode = inode; | ||
2835 | pagevec_init(&pvec, 0); | 2739 | pagevec_init(&pvec, 0); |
2836 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2740 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2837 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2741 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
@@ -2842,13 +2746,11 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2842 | tag = PAGECACHE_TAG_DIRTY; | 2746 | tag = PAGECACHE_TAG_DIRTY; |
2843 | 2747 | ||
2844 | *done_index = index; | 2748 | *done_index = index; |
2845 | while (!done && (index <= end)) { | 2749 | while (index <= end) { |
2846 | int i; | ||
2847 | |||
2848 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2750 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2849 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2751 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2850 | if (nr_pages == 0) | 2752 | if (nr_pages == 0) |
2851 | break; | 2753 | return 0; |
2852 | 2754 | ||
2853 | for (i = 0; i < nr_pages; i++) { | 2755 | for (i = 0; i < nr_pages; i++) { |
2854 | struct page *page = pvec.pages[i]; | 2756 | struct page *page = pvec.pages[i]; |
@@ -2860,60 +2762,100 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2860 | * mapping. However, page->index will not change | 2762 | * mapping. However, page->index will not change |
2861 | * because we have a reference on the page. | 2763 | * because we have a reference on the page. |
2862 | */ | 2764 | */ |
2863 | if (page->index > end) { | 2765 | if (page->index > end) |
2864 | done = 1; | 2766 | goto out; |
2865 | break; | ||
2866 | } | ||
2867 | 2767 | ||
2868 | *done_index = page->index + 1; | 2768 | *done_index = page->index + 1; |
2869 | 2769 | ||
2770 | /* | ||
2771 | * If we can't merge this page, and we have | ||
2772 | * accumulated an contiguous region, write it | ||
2773 | */ | ||
2774 | if ((mpd->next_page != page->index) && | ||
2775 | (mpd->next_page != mpd->first_page)) { | ||
2776 | mpage_da_map_and_submit(mpd); | ||
2777 | goto ret_extent_tail; | ||
2778 | } | ||
2779 | |||
2870 | lock_page(page); | 2780 | lock_page(page); |
2871 | 2781 | ||
2872 | /* | 2782 | /* |
2873 | * Page truncated or invalidated. We can freely skip it | 2783 | * If the page is no longer dirty, or its |
2874 | * then, even for data integrity operations: the page | 2784 | * mapping no longer corresponds to inode we |
2875 | * has disappeared concurrently, so there could be no | 2785 | * are writing (which means it has been |
2876 | * real expectation of this data interity operation | 2786 | * truncated or invalidated), or the page is |
2877 | * even if there is now a new, dirty page at the same | 2787 | * already under writeback and we are not |
2878 | * pagecache address. | 2788 | * doing a data integrity writeback, skip the page |
2879 | */ | 2789 | */ |
2880 | if (unlikely(page->mapping != mapping)) { | 2790 | if (!PageDirty(page) || |
2881 | continue_unlock: | 2791 | (PageWriteback(page) && |
2792 | (wbc->sync_mode == WB_SYNC_NONE)) || | ||
2793 | unlikely(page->mapping != mapping)) { | ||
2882 | unlock_page(page); | 2794 | unlock_page(page); |
2883 | continue; | 2795 | continue; |
2884 | } | 2796 | } |
2885 | 2797 | ||
2886 | if (!PageDirty(page)) { | 2798 | if (PageWriteback(page)) |
2887 | /* someone wrote it for us */ | 2799 | wait_on_page_writeback(page); |
2888 | goto continue_unlock; | ||
2889 | } | ||
2890 | |||
2891 | if (PageWriteback(page)) { | ||
2892 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2893 | wait_on_page_writeback(page); | ||
2894 | else | ||
2895 | goto continue_unlock; | ||
2896 | } | ||
2897 | 2800 | ||
2898 | BUG_ON(PageWriteback(page)); | 2801 | BUG_ON(PageWriteback(page)); |
2899 | if (!clear_page_dirty_for_io(page)) | ||
2900 | goto continue_unlock; | ||
2901 | 2802 | ||
2902 | ret = __mpage_da_writepage(page, wbc, mpd); | 2803 | if (mpd->next_page != page->index) |
2903 | if (unlikely(ret)) { | 2804 | mpd->first_page = page->index; |
2904 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | 2805 | mpd->next_page = page->index + 1; |
2905 | unlock_page(page); | 2806 | logical = (sector_t) page->index << |
2906 | ret = 0; | 2807 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2907 | } else { | 2808 | |
2908 | done = 1; | 2809 | if (!page_has_buffers(page)) { |
2909 | break; | 2810 | mpage_add_bh_to_extent(mpd, logical, |
2910 | } | 2811 | PAGE_CACHE_SIZE, |
2812 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2813 | if (mpd->io_done) | ||
2814 | goto ret_extent_tail; | ||
2815 | } else { | ||
2816 | /* | ||
2817 | * Page with regular buffer heads, | ||
2818 | * just add all dirty ones | ||
2819 | */ | ||
2820 | head = page_buffers(page); | ||
2821 | bh = head; | ||
2822 | do { | ||
2823 | BUG_ON(buffer_locked(bh)); | ||
2824 | /* | ||
2825 | * We need to try to allocate | ||
2826 | * unmapped blocks in the same page. | ||
2827 | * Otherwise we won't make progress | ||
2828 | * with the page in ext4_writepage | ||
2829 | */ | ||
2830 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2831 | mpage_add_bh_to_extent(mpd, logical, | ||
2832 | bh->b_size, | ||
2833 | bh->b_state); | ||
2834 | if (mpd->io_done) | ||
2835 | goto ret_extent_tail; | ||
2836 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2837 | /* | ||
2838 | * mapped dirty buffer. We need | ||
2839 | * to update the b_state | ||
2840 | * because we look at b_state | ||
2841 | * in mpage_da_map_blocks. We | ||
2842 | * don't update b_size because | ||
2843 | * if we find an unmapped | ||
2844 | * buffer_head later we need to | ||
2845 | * use the b_state flag of that | ||
2846 | * buffer_head. | ||
2847 | */ | ||
2848 | if (mpd->b_size == 0) | ||
2849 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2850 | } | ||
2851 | logical++; | ||
2852 | } while ((bh = bh->b_this_page) != head); | ||
2911 | } | 2853 | } |
2912 | 2854 | ||
2913 | if (nr_to_write > 0) { | 2855 | if (nr_to_write > 0) { |
2914 | nr_to_write--; | 2856 | nr_to_write--; |
2915 | if (nr_to_write == 0 && | 2857 | if (nr_to_write == 0 && |
2916 | wbc->sync_mode == WB_SYNC_NONE) { | 2858 | wbc->sync_mode == WB_SYNC_NONE) |
2917 | /* | 2859 | /* |
2918 | * We stop writing back only if we are | 2860 | * We stop writing back only if we are |
2919 | * not doing integrity sync. In case of | 2861 | * not doing integrity sync. In case of |
@@ -2924,14 +2866,18 @@ continue_unlock: | |||
2924 | * pages, but have not synced all of the | 2866 | * pages, but have not synced all of the |
2925 | * old dirty pages. | 2867 | * old dirty pages. |
2926 | */ | 2868 | */ |
2927 | done = 1; | 2869 | goto out; |
2928 | break; | ||
2929 | } | ||
2930 | } | 2870 | } |
2931 | } | 2871 | } |
2932 | pagevec_release(&pvec); | 2872 | pagevec_release(&pvec); |
2933 | cond_resched(); | 2873 | cond_resched(); |
2934 | } | 2874 | } |
2875 | return 0; | ||
2876 | ret_extent_tail: | ||
2877 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2878 | out: | ||
2879 | pagevec_release(&pvec); | ||
2880 | cond_resched(); | ||
2935 | return ret; | 2881 | return ret; |
2936 | } | 2882 | } |
2937 | 2883 | ||
@@ -2945,7 +2891,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2945 | struct mpage_da_data mpd; | 2891 | struct mpage_da_data mpd; |
2946 | struct inode *inode = mapping->host; | 2892 | struct inode *inode = mapping->host; |
2947 | int pages_written = 0; | 2893 | int pages_written = 0; |
2948 | long pages_skipped; | ||
2949 | unsigned int max_pages; | 2894 | unsigned int max_pages; |
2950 | int range_cyclic, cycled = 1, io_done = 0; | 2895 | int range_cyclic, cycled = 1, io_done = 0; |
2951 | int needed_blocks, ret = 0; | 2896 | int needed_blocks, ret = 0; |
@@ -3028,11 +2973,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3028 | wbc->nr_to_write = desired_nr_to_write; | 2973 | wbc->nr_to_write = desired_nr_to_write; |
3029 | } | 2974 | } |
3030 | 2975 | ||
3031 | mpd.wbc = wbc; | ||
3032 | mpd.inode = mapping->host; | ||
3033 | |||
3034 | pages_skipped = wbc->pages_skipped; | ||
3035 | |||
3036 | retry: | 2976 | retry: |
3037 | if (wbc->sync_mode == WB_SYNC_ALL) | 2977 | if (wbc->sync_mode == WB_SYNC_ALL) |
3038 | tag_pages_for_writeback(mapping, index, end); | 2978 | tag_pages_for_writeback(mapping, index, end); |
@@ -3059,22 +2999,10 @@ retry: | |||
3059 | } | 2999 | } |
3060 | 3000 | ||
3061 | /* | 3001 | /* |
3062 | * Now call __mpage_da_writepage to find the next | 3002 | * Now call write_cache_pages_da() to find the next |
3063 | * contiguous region of logical blocks that need | 3003 | * contiguous region of logical blocks that need |
3064 | * blocks to be allocated by ext4. We don't actually | 3004 | * blocks to be allocated by ext4 and submit them. |
3065 | * submit the blocks for I/O here, even though | ||
3066 | * write_cache_pages thinks it will, and will set the | ||
3067 | * pages as clean for write before calling | ||
3068 | * __mpage_da_writepage(). | ||
3069 | */ | 3005 | */ |
3070 | mpd.b_size = 0; | ||
3071 | mpd.b_state = 0; | ||
3072 | mpd.b_blocknr = 0; | ||
3073 | mpd.first_page = 0; | ||
3074 | mpd.next_page = 0; | ||
3075 | mpd.io_done = 0; | ||
3076 | mpd.pages_written = 0; | ||
3077 | mpd.retval = 0; | ||
3078 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); | 3006 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3079 | /* | 3007 | /* |
3080 | * If we have a contiguous extent of pages and we | 3008 | * If we have a contiguous extent of pages and we |
@@ -3096,7 +3024,6 @@ retry: | |||
3096 | * and try again | 3024 | * and try again |
3097 | */ | 3025 | */ |
3098 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3026 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3099 | wbc->pages_skipped = pages_skipped; | ||
3100 | ret = 0; | 3027 | ret = 0; |
3101 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3028 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3102 | /* | 3029 | /* |
@@ -3104,7 +3031,6 @@ retry: | |||
3104 | * rest of the pages | 3031 | * rest of the pages |
3105 | */ | 3032 | */ |
3106 | pages_written += mpd.pages_written; | 3033 | pages_written += mpd.pages_written; |
3107 | wbc->pages_skipped = pages_skipped; | ||
3108 | ret = 0; | 3034 | ret = 0; |
3109 | io_done = 1; | 3035 | io_done = 1; |
3110 | } else if (wbc->nr_to_write) | 3036 | } else if (wbc->nr_to_write) |
@@ -3122,11 +3048,6 @@ retry: | |||
3122 | wbc->range_end = mapping->writeback_index - 1; | 3048 | wbc->range_end = mapping->writeback_index - 1; |
3123 | goto retry; | 3049 | goto retry; |
3124 | } | 3050 | } |
3125 | if (pages_skipped != wbc->pages_skipped) | ||
3126 | ext4_msg(inode->i_sb, KERN_CRIT, | ||
3127 | "This should not happen leaving %s " | ||
3128 | "with nr_to_write = %ld ret = %d", | ||
3129 | __func__, wbc->nr_to_write, ret); | ||
3130 | 3051 | ||
3131 | /* Update index */ | 3052 | /* Update index */ |
3132 | wbc->range_cyclic = range_cyclic; | 3053 | wbc->range_cyclic = range_cyclic; |
@@ -3460,6 +3381,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3460 | 3381 | ||
3461 | static int ext4_readpage(struct file *file, struct page *page) | 3382 | static int ext4_readpage(struct file *file, struct page *page) |
3462 | { | 3383 | { |
3384 | trace_ext4_readpage(page); | ||
3463 | return mpage_readpage(page, ext4_get_block); | 3385 | return mpage_readpage(page, ext4_get_block); |
3464 | } | 3386 | } |
3465 | 3387 | ||
@@ -3494,6 +3416,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
3494 | { | 3416 | { |
3495 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3417 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3496 | 3418 | ||
3419 | trace_ext4_invalidatepage(page, offset); | ||
3420 | |||
3497 | /* | 3421 | /* |
3498 | * free any io_end structure allocated for buffers to be discarded | 3422 | * free any io_end structure allocated for buffers to be discarded |
3499 | */ | 3423 | */ |
@@ -3515,6 +3439,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3515 | { | 3439 | { |
3516 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3440 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3517 | 3441 | ||
3442 | trace_ext4_releasepage(page); | ||
3443 | |||
3518 | WARN_ON(PageChecked(page)); | 3444 | WARN_ON(PageChecked(page)); |
3519 | if (!page_has_buffers(page)) | 3445 | if (!page_has_buffers(page)) |
3520 | return 0; | 3446 | return 0; |
@@ -3873,11 +3799,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3873 | { | 3799 | { |
3874 | struct file *file = iocb->ki_filp; | 3800 | struct file *file = iocb->ki_filp; |
3875 | struct inode *inode = file->f_mapping->host; | 3801 | struct inode *inode = file->f_mapping->host; |
3802 | ssize_t ret; | ||
3876 | 3803 | ||
3804 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
3877 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3805 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3878 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3806 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3879 | 3807 | else | |
3880 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3808 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3809 | trace_ext4_direct_IO_exit(inode, offset, | ||
3810 | iov_length(iov, nr_segs), rw, ret); | ||
3811 | return ret; | ||
3881 | } | 3812 | } |
3882 | 3813 | ||
3883 | /* | 3814 | /* |
@@ -4173,6 +4104,9 @@ no_top: | |||
4173 | * | 4104 | * |
4174 | * We release `count' blocks on disk, but (last - first) may be greater | 4105 | * We release `count' blocks on disk, but (last - first) may be greater |
4175 | * than `count' because there can be holes in there. | 4106 | * than `count' because there can be holes in there. |
4107 | * | ||
4108 | * Return 0 on success, 1 on invalid block range | ||
4109 | * and < 0 on fatal error. | ||
4176 | */ | 4110 | */ |
4177 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4111 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4178 | struct buffer_head *bh, | 4112 | struct buffer_head *bh, |
@@ -4199,33 +4133,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4199 | if (bh) { | 4133 | if (bh) { |
4200 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4134 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4201 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4135 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4202 | if (unlikely(err)) { | 4136 | if (unlikely(err)) |
4203 | ext4_std_error(inode->i_sb, err); | 4137 | goto out_err; |
4204 | return 1; | ||
4205 | } | ||
4206 | } | 4138 | } |
4207 | err = ext4_mark_inode_dirty(handle, inode); | 4139 | err = ext4_mark_inode_dirty(handle, inode); |
4208 | if (unlikely(err)) { | 4140 | if (unlikely(err)) |
4209 | ext4_std_error(inode->i_sb, err); | 4141 | goto out_err; |
4210 | return 1; | ||
4211 | } | ||
4212 | err = ext4_truncate_restart_trans(handle, inode, | 4142 | err = ext4_truncate_restart_trans(handle, inode, |
4213 | blocks_for_truncate(inode)); | 4143 | blocks_for_truncate(inode)); |
4214 | if (unlikely(err)) { | 4144 | if (unlikely(err)) |
4215 | ext4_std_error(inode->i_sb, err); | 4145 | goto out_err; |
4216 | return 1; | ||
4217 | } | ||
4218 | if (bh) { | 4146 | if (bh) { |
4219 | BUFFER_TRACE(bh, "retaking write access"); | 4147 | BUFFER_TRACE(bh, "retaking write access"); |
4220 | ext4_journal_get_write_access(handle, bh); | 4148 | err = ext4_journal_get_write_access(handle, bh); |
4149 | if (unlikely(err)) | ||
4150 | goto out_err; | ||
4221 | } | 4151 | } |
4222 | } | 4152 | } |
4223 | 4153 | ||
4224 | for (p = first; p < last; p++) | 4154 | for (p = first; p < last; p++) |
4225 | *p = 0; | 4155 | *p = 0; |
4226 | 4156 | ||
4227 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4157 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4228 | return 0; | 4158 | return 0; |
4159 | out_err: | ||
4160 | ext4_std_error(inode->i_sb, err); | ||
4161 | return err; | ||
4229 | } | 4162 | } |
4230 | 4163 | ||
4231 | /** | 4164 | /** |
@@ -4259,7 +4192,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4259 | ext4_fsblk_t nr; /* Current block # */ | 4192 | ext4_fsblk_t nr; /* Current block # */ |
4260 | __le32 *p; /* Pointer into inode/ind | 4193 | __le32 *p; /* Pointer into inode/ind |
4261 | for current block */ | 4194 | for current block */ |
4262 | int err; | 4195 | int err = 0; |
4263 | 4196 | ||
4264 | if (this_bh) { /* For indirect block */ | 4197 | if (this_bh) { /* For indirect block */ |
4265 | BUFFER_TRACE(this_bh, "get_write_access"); | 4198 | BUFFER_TRACE(this_bh, "get_write_access"); |
@@ -4281,9 +4214,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4281 | } else if (nr == block_to_free + count) { | 4214 | } else if (nr == block_to_free + count) { |
4282 | count++; | 4215 | count++; |
4283 | } else { | 4216 | } else { |
4284 | if (ext4_clear_blocks(handle, inode, this_bh, | 4217 | err = ext4_clear_blocks(handle, inode, this_bh, |
4285 | block_to_free, count, | 4218 | block_to_free, count, |
4286 | block_to_free_p, p)) | 4219 | block_to_free_p, p); |
4220 | if (err) | ||
4287 | break; | 4221 | break; |
4288 | block_to_free = nr; | 4222 | block_to_free = nr; |
4289 | block_to_free_p = p; | 4223 | block_to_free_p = p; |
@@ -4292,9 +4226,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4292 | } | 4226 | } |
4293 | } | 4227 | } |
4294 | 4228 | ||
4295 | if (count > 0) | 4229 | if (!err && count > 0) |
4296 | ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4230 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4297 | count, block_to_free_p, p); | 4231 | count, block_to_free_p, p); |
4232 | if (err < 0) | ||
4233 | /* fatal error */ | ||
4234 | return; | ||
4298 | 4235 | ||
4299 | if (this_bh) { | 4236 | if (this_bh) { |
4300 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4237 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
@@ -4412,7 +4349,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4412 | * transaction where the data blocks are | 4349 | * transaction where the data blocks are |
4413 | * actually freed. | 4350 | * actually freed. |
4414 | */ | 4351 | */ |
4415 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4352 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4416 | EXT4_FREE_BLOCKS_METADATA| | 4353 | EXT4_FREE_BLOCKS_METADATA| |
4417 | EXT4_FREE_BLOCKS_FORGET); | 4354 | EXT4_FREE_BLOCKS_FORGET); |
4418 | 4355 | ||
@@ -4496,6 +4433,8 @@ void ext4_truncate(struct inode *inode) | |||
4496 | ext4_lblk_t last_block; | 4433 | ext4_lblk_t last_block; |
4497 | unsigned blocksize = inode->i_sb->s_blocksize; | 4434 | unsigned blocksize = inode->i_sb->s_blocksize; |
4498 | 4435 | ||
4436 | trace_ext4_truncate_enter(inode); | ||
4437 | |||
4499 | if (!ext4_can_truncate(inode)) | 4438 | if (!ext4_can_truncate(inode)) |
4500 | return; | 4439 | return; |
4501 | 4440 | ||
@@ -4506,6 +4445,7 @@ void ext4_truncate(struct inode *inode) | |||
4506 | 4445 | ||
4507 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4446 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4508 | ext4_ext_truncate(inode); | 4447 | ext4_ext_truncate(inode); |
4448 | trace_ext4_truncate_exit(inode); | ||
4509 | return; | 4449 | return; |
4510 | } | 4450 | } |
4511 | 4451 | ||
@@ -4635,6 +4575,7 @@ out_stop: | |||
4635 | ext4_orphan_del(handle, inode); | 4575 | ext4_orphan_del(handle, inode); |
4636 | 4576 | ||
4637 | ext4_journal_stop(handle); | 4577 | ext4_journal_stop(handle); |
4578 | trace_ext4_truncate_exit(inode); | ||
4638 | } | 4579 | } |
4639 | 4580 | ||
4640 | /* | 4581 | /* |
@@ -4766,6 +4707,7 @@ make_io: | |||
4766 | * has in-inode xattrs, or we don't have this inode in memory. | 4707 | * has in-inode xattrs, or we don't have this inode in memory. |
4767 | * Read the block from disk. | 4708 | * Read the block from disk. |
4768 | */ | 4709 | */ |
4710 | trace_ext4_load_inode(inode); | ||
4769 | get_bh(bh); | 4711 | get_bh(bh); |
4770 | bh->b_end_io = end_buffer_read_sync; | 4712 | bh->b_end_io = end_buffer_read_sync; |
4771 | submit_bh(READ_META, bh); | 4713 | submit_bh(READ_META, bh); |
@@ -4871,7 +4813,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4871 | return inode; | 4813 | return inode; |
4872 | 4814 | ||
4873 | ei = EXT4_I(inode); | 4815 | ei = EXT4_I(inode); |
4874 | iloc.bh = 0; | 4816 | iloc.bh = NULL; |
4875 | 4817 | ||
4876 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4818 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
4877 | if (ret < 0) | 4819 | if (ret < 0) |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a84faa110bcd..808c554e773f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -334,16 +334,22 @@ mext_out: | |||
334 | case FITRIM: | 334 | case FITRIM: |
335 | { | 335 | { |
336 | struct super_block *sb = inode->i_sb; | 336 | struct super_block *sb = inode->i_sb; |
337 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
337 | struct fstrim_range range; | 338 | struct fstrim_range range; |
338 | int ret = 0; | 339 | int ret = 0; |
339 | 340 | ||
340 | if (!capable(CAP_SYS_ADMIN)) | 341 | if (!capable(CAP_SYS_ADMIN)) |
341 | return -EPERM; | 342 | return -EPERM; |
342 | 343 | ||
344 | if (!blk_queue_discard(q)) | ||
345 | return -EOPNOTSUPP; | ||
346 | |||
343 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 347 | if (copy_from_user(&range, (struct fstrim_range *)arg, |
344 | sizeof(range))) | 348 | sizeof(range))) |
345 | return -EFAULT; | 349 | return -EFAULT; |
346 | 350 | ||
351 | range.minlen = max((unsigned int)range.minlen, | ||
352 | q->limits.discard_granularity); | ||
347 | ret = ext4_trim_fs(sb, &range); | 353 | ret = ext4_trim_fs(sb, &range); |
348 | if (ret < 0) | 354 | if (ret < 0) |
349 | return ret; | 355 | return ret; |
@@ -421,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
421 | return err; | 427 | return err; |
422 | } | 428 | } |
423 | case EXT4_IOC_MOVE_EXT: | 429 | case EXT4_IOC_MOVE_EXT: |
430 | case FITRIM: | ||
424 | break; | 431 | break; |
425 | default: | 432 | default: |
426 | return -ENOIOCTLCMD; | 433 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d1fe09aea73d..a5837a837a8b 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -432,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
432 | } | 432 | } |
433 | 433 | ||
434 | /* at order 0 we see each particular block */ | 434 | /* at order 0 we see each particular block */ |
435 | *max = 1 << (e4b->bd_blkbits + 3); | 435 | if (order == 0) { |
436 | if (order == 0) | 436 | *max = 1 << (e4b->bd_blkbits + 3); |
437 | return EXT4_MB_BITMAP(e4b); | 437 | return EXT4_MB_BITMAP(e4b); |
438 | } | ||
438 | 439 | ||
439 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 440 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
440 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 441 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
@@ -616,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
616 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); | 617 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
617 | 618 | ||
618 | grp = ext4_get_group_info(sb, e4b->bd_group); | 619 | grp = ext4_get_group_info(sb, e4b->bd_group); |
619 | buddy = mb_find_buddy(e4b, 0, &max); | ||
620 | list_for_each(cur, &grp->bb_prealloc_list) { | 620 | list_for_each(cur, &grp->bb_prealloc_list) { |
621 | ext4_group_t groupnr; | 621 | ext4_group_t groupnr; |
622 | struct ext4_prealloc_space *pa; | 622 | struct ext4_prealloc_space *pa; |
@@ -635,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
635 | #define mb_check_buddy(e4b) | 635 | #define mb_check_buddy(e4b) |
636 | #endif | 636 | #endif |
637 | 637 | ||
638 | /* FIXME!! need more doc */ | 638 | /* |
639 | * Divide blocks started from @first with length @len into | ||
640 | * smaller chunks with power of 2 blocks. | ||
641 | * Clear the bits in bitmap which the blocks of the chunk(s) covered, | ||
642 | * then increase bb_counters[] for corresponded chunk size. | ||
643 | */ | ||
639 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 644 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
640 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, | 645 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
641 | struct ext4_group_info *grp) | 646 | struct ext4_group_info *grp) |
@@ -2381,7 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2381 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2386 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2382 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2387 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2383 | * So a two level scheme suffices for now. */ | 2388 | * So a two level scheme suffices for now. */ |
2384 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); | 2389 | sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); |
2385 | if (sbi->s_group_info == NULL) { | 2390 | if (sbi->s_group_info == NULL) { |
2386 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2391 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
2387 | return -ENOMEM; | 2392 | return -ENOMEM; |
@@ -3208,7 +3213,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
3208 | cur_distance = abs(goal_block - cpa->pa_pstart); | 3213 | cur_distance = abs(goal_block - cpa->pa_pstart); |
3209 | new_distance = abs(goal_block - pa->pa_pstart); | 3214 | new_distance = abs(goal_block - pa->pa_pstart); |
3210 | 3215 | ||
3211 | if (cur_distance < new_distance) | 3216 | if (cur_distance <= new_distance) |
3212 | return cpa; | 3217 | return cpa; |
3213 | 3218 | ||
3214 | /* drop the previous reference */ | 3219 | /* drop the previous reference */ |
@@ -3907,7 +3912,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3907 | struct super_block *sb = ac->ac_sb; | 3912 | struct super_block *sb = ac->ac_sb; |
3908 | ext4_group_t ngroups, i; | 3913 | ext4_group_t ngroups, i; |
3909 | 3914 | ||
3910 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 3915 | if (!mb_enable_debug || |
3916 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | ||
3911 | return; | 3917 | return; |
3912 | 3918 | ||
3913 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3919 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
@@ -4753,7 +4759,8 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4753 | * bitmap. Then issue a TRIM command on this extent and free the extent in | 4759 | * bitmap. Then issue a TRIM command on this extent and free the extent in |
4754 | * the group buddy bitmap. This is done until whole group is scanned. | 4760 | * the group buddy bitmap. This is done until whole group is scanned. |
4755 | */ | 4761 | */ |
4756 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | 4762 | static ext4_grpblk_t |
4763 | ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4757 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | 4764 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) |
4758 | { | 4765 | { |
4759 | void *bitmap; | 4766 | void *bitmap; |
@@ -4863,10 +4870,15 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4863 | break; | 4870 | break; |
4864 | } | 4871 | } |
4865 | 4872 | ||
4866 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | 4873 | /* |
4867 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | 4874 | * For all the groups except the last one, last block will |
4868 | else | 4875 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to |
4876 | * change it for the last group in which case start + | ||
4877 | * len < EXT4_BLOCKS_PER_GROUP(sb). | ||
4878 | */ | ||
4879 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | ||
4869 | last_block = first_block + len; | 4880 | last_block = first_block + len; |
4881 | len -= last_block - first_block; | ||
4870 | 4882 | ||
4871 | if (e4b.bd_info->bb_free >= minlen) { | 4883 | if (e4b.bd_info->bb_free >= minlen) { |
4872 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | 4884 | cnt = ext4_trim_all_free(sb, &e4b, first_block, |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b619322c76f0..22bd4d7f289b 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -169,7 +169,7 @@ struct ext4_allocation_context { | |||
169 | /* original request */ | 169 | /* original request */ |
170 | struct ext4_free_extent ac_o_ex; | 170 | struct ext4_free_extent ac_o_ex; |
171 | 171 | ||
172 | /* goal request (after normalization) */ | 172 | /* goal request (normalized ac_o_ex) */ |
173 | struct ext4_free_extent ac_g_ex; | 173 | struct ext4_free_extent ac_g_ex; |
174 | 174 | ||
175 | /* the best found extent */ | 175 | /* the best found extent */ |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b0a126f23c20..d1bafa57f483 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle, | |||
263 | for (i = 0; i < max_entries; i++) { | 263 | for (i = 0; i < max_entries; i++) { |
264 | if (tmp_idata[i]) { | 264 | if (tmp_idata[i]) { |
265 | extend_credit_for_blkdel(handle, inode); | 265 | extend_credit_for_blkdel(handle, inode); |
266 | ext4_free_blocks(handle, inode, 0, | 266 | ext4_free_blocks(handle, inode, NULL, |
267 | le32_to_cpu(tmp_idata[i]), 1, | 267 | le32_to_cpu(tmp_idata[i]), 1, |
268 | EXT4_FREE_BLOCKS_METADATA | | 268 | EXT4_FREE_BLOCKS_METADATA | |
269 | EXT4_FREE_BLOCKS_FORGET); | 269 | EXT4_FREE_BLOCKS_FORGET); |
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle, | |||
271 | } | 271 | } |
272 | put_bh(bh); | 272 | put_bh(bh); |
273 | extend_credit_for_blkdel(handle, inode); | 273 | extend_credit_for_blkdel(handle, inode); |
274 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 274 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
275 | EXT4_FREE_BLOCKS_METADATA | | 275 | EXT4_FREE_BLOCKS_METADATA | |
276 | EXT4_FREE_BLOCKS_FORGET); | 276 | EXT4_FREE_BLOCKS_FORGET); |
277 | return 0; | 277 | return 0; |
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle, | |||
302 | } | 302 | } |
303 | put_bh(bh); | 303 | put_bh(bh); |
304 | extend_credit_for_blkdel(handle, inode); | 304 | extend_credit_for_blkdel(handle, inode); |
305 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 305 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
306 | EXT4_FREE_BLOCKS_METADATA | | 306 | EXT4_FREE_BLOCKS_METADATA | |
307 | EXT4_FREE_BLOCKS_FORGET); | 307 | EXT4_FREE_BLOCKS_FORGET); |
308 | return 0; | 308 | return 0; |
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) | |||
315 | /* ei->i_data[EXT4_IND_BLOCK] */ | 315 | /* ei->i_data[EXT4_IND_BLOCK] */ |
316 | if (i_data[0]) { | 316 | if (i_data[0]) { |
317 | extend_credit_for_blkdel(handle, inode); | 317 | extend_credit_for_blkdel(handle, inode); |
318 | ext4_free_blocks(handle, inode, 0, | 318 | ext4_free_blocks(handle, inode, NULL, |
319 | le32_to_cpu(i_data[0]), 1, | 319 | le32_to_cpu(i_data[0]), 1, |
320 | EXT4_FREE_BLOCKS_METADATA | | 320 | EXT4_FREE_BLOCKS_METADATA | |
321 | EXT4_FREE_BLOCKS_FORGET); | 321 | EXT4_FREE_BLOCKS_FORGET); |
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
428 | } | 428 | } |
429 | put_bh(bh); | 429 | put_bh(bh); |
430 | extend_credit_for_blkdel(handle, inode); | 430 | extend_credit_for_blkdel(handle, inode); |
431 | ext4_free_blocks(handle, inode, 0, block, 1, | 431 | ext4_free_blocks(handle, inode, NULL, block, 1, |
432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
433 | return retval; | 433 | return retval; |
434 | } | 434 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e781b7ea5630..67fd0b025858 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xattr.h" | 40 | #include "xattr.h" |
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | 42 | ||
43 | #include <trace/events/ext4.h> | ||
43 | /* | 44 | /* |
44 | * define how far ahead to read directories while searching them. | 45 | * define how far ahead to read directories while searching them. |
45 | */ | 46 | */ |
@@ -2183,6 +2184,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2183 | struct ext4_dir_entry_2 *de; | 2184 | struct ext4_dir_entry_2 *de; |
2184 | handle_t *handle; | 2185 | handle_t *handle; |
2185 | 2186 | ||
2187 | trace_ext4_unlink_enter(dir, dentry); | ||
2186 | /* Initialize quotas before so that eventual writes go | 2188 | /* Initialize quotas before so that eventual writes go |
2187 | * in separate transaction */ | 2189 | * in separate transaction */ |
2188 | dquot_initialize(dir); | 2190 | dquot_initialize(dir); |
@@ -2228,6 +2230,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2228 | end_unlink: | 2230 | end_unlink: |
2229 | ext4_journal_stop(handle); | 2231 | ext4_journal_stop(handle); |
2230 | brelse(bh); | 2232 | brelse(bh); |
2233 | trace_ext4_unlink_exit(dentry, retval); | ||
2231 | return retval; | 2234 | return retval; |
2232 | } | 2235 | } |
2233 | 2236 | ||
@@ -2402,6 +2405,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2402 | if (!new_inode && new_dir != old_dir && | 2405 | if (!new_inode && new_dir != old_dir && |
2403 | EXT4_DIR_LINK_MAX(new_dir)) | 2406 | EXT4_DIR_LINK_MAX(new_dir)) |
2404 | goto end_rename; | 2407 | goto end_rename; |
2408 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2409 | retval = ext4_journal_get_write_access(handle, dir_bh); | ||
2410 | if (retval) | ||
2411 | goto end_rename; | ||
2405 | } | 2412 | } |
2406 | if (!new_bh) { | 2413 | if (!new_bh) { |
2407 | retval = ext4_add_entry(handle, new_dentry, old_inode); | 2414 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
@@ -2409,7 +2416,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2409 | goto end_rename; | 2416 | goto end_rename; |
2410 | } else { | 2417 | } else { |
2411 | BUFFER_TRACE(new_bh, "get write access"); | 2418 | BUFFER_TRACE(new_bh, "get write access"); |
2412 | ext4_journal_get_write_access(handle, new_bh); | 2419 | retval = ext4_journal_get_write_access(handle, new_bh); |
2420 | if (retval) | ||
2421 | goto end_rename; | ||
2413 | new_de->inode = cpu_to_le32(old_inode->i_ino); | 2422 | new_de->inode = cpu_to_le32(old_inode->i_ino); |
2414 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, | 2423 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, |
2415 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | 2424 | EXT4_FEATURE_INCOMPAT_FILETYPE)) |
@@ -2470,8 +2479,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2470 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); | 2479 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); |
2471 | ext4_update_dx_flag(old_dir); | 2480 | ext4_update_dx_flag(old_dir); |
2472 | if (dir_bh) { | 2481 | if (dir_bh) { |
2473 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2474 | ext4_journal_get_write_access(handle, dir_bh); | ||
2475 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2482 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
2476 | cpu_to_le32(new_dir->i_ino); | 2483 | cpu_to_le32(new_dir->i_ino); |
2477 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2484 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index e2cd90e4bb7c..b6dbd056fcb1 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -259,6 +259,11 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
259 | bi_sector >> (inode->i_blkbits - 9)); | 259 | bi_sector >> (inode->i_blkbits - 9)); |
260 | } | 260 | } |
261 | 261 | ||
262 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
263 | ext4_free_io_end(io_end); | ||
264 | return; | ||
265 | } | ||
266 | |||
262 | /* Add the io_end to per-inode completed io list*/ | 267 | /* Add the io_end to per-inode completed io list*/ |
263 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 268 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
264 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | 269 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); |
@@ -279,9 +284,9 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
279 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | 284 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); |
280 | bio_put(io->io_bio); | 285 | bio_put(io->io_bio); |
281 | } | 286 | } |
282 | io->io_bio = 0; | 287 | io->io_bio = NULL; |
283 | io->io_op = 0; | 288 | io->io_op = 0; |
284 | io->io_end = 0; | 289 | io->io_end = NULL; |
285 | } | 290 | } |
286 | 291 | ||
287 | static int io_submit_init(struct ext4_io_submit *io, | 292 | static int io_submit_init(struct ext4_io_submit *io, |
@@ -380,8 +385,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
380 | 385 | ||
381 | BUG_ON(!PageLocked(page)); | 386 | BUG_ON(!PageLocked(page)); |
382 | BUG_ON(PageWriteback(page)); | 387 | BUG_ON(PageWriteback(page)); |
383 | set_page_writeback(page); | ||
384 | ClearPageError(page); | ||
385 | 388 | ||
386 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | 389 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); |
387 | if (!io_page) { | 390 | if (!io_page) { |
@@ -392,6 +395,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
392 | io_page->p_page = page; | 395 | io_page->p_page = page; |
393 | atomic_set(&io_page->p_count, 1); | 396 | atomic_set(&io_page->p_count, 1); |
394 | get_page(page); | 397 | get_page(page); |
398 | set_page_writeback(page); | ||
399 | ClearPageError(page); | ||
395 | 400 | ||
396 | for (bh = head = page_buffers(page), block_start = 0; | 401 | for (bh = head = page_buffers(page), block_start = 0; |
397 | bh != head || !block_start; | 402 | bh != head || !block_start; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3ecc6e45d2f9..80bbc9c60c24 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -230,7 +230,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
230 | } | 230 | } |
231 | 231 | ||
232 | /* Zero out all of the reserved backup group descriptor table blocks */ | 232 | /* Zero out all of the reserved backup group descriptor table blocks */ |
233 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", | 233 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
234 | block, sbi->s_itb_per_group); | 234 | block, sbi->s_itb_per_group); |
235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | 235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
236 | GFP_NOFS); | 236 | GFP_NOFS); |
@@ -248,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
248 | 248 | ||
249 | /* Zero out all of the inode table blocks */ | 249 | /* Zero out all of the inode table blocks */ |
250 | block = input->inode_table; | 250 | block = input->inode_table; |
251 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", | 251 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
252 | block, sbi->s_itb_per_group); | 252 | block, sbi->s_itb_per_group); |
253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | 253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); |
254 | if (err) | 254 | if (err) |
@@ -499,12 +499,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
499 | return err; | 499 | return err; |
500 | 500 | ||
501 | exit_inode: | 501 | exit_inode: |
502 | /* ext4_journal_release_buffer(handle, iloc.bh); */ | 502 | /* ext4_handle_release_buffer(handle, iloc.bh); */ |
503 | brelse(iloc.bh); | 503 | brelse(iloc.bh); |
504 | exit_dindj: | 504 | exit_dindj: |
505 | /* ext4_journal_release_buffer(handle, dind); */ | 505 | /* ext4_handle_release_buffer(handle, dind); */ |
506 | exit_sbh: | 506 | exit_sbh: |
507 | /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ | 507 | /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ |
508 | exit_dind: | 508 | exit_dind: |
509 | brelse(dind); | 509 | brelse(dind); |
510 | exit_bh: | 510 | exit_bh: |
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
586 | /* | 586 | /* |
587 | int j; | 587 | int j; |
588 | for (j = 0; j < i; j++) | 588 | for (j = 0; j < i; j++) |
589 | ext4_journal_release_buffer(handle, primary[j]); | 589 | ext4_handle_release_buffer(handle, primary[j]); |
590 | */ | 590 | */ |
591 | goto exit_bh; | 591 | goto exit_bh; |
592 | } | 592 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 203f9e4a70be..22546ad7f0ae 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -54,9 +54,9 @@ | |||
54 | 54 | ||
55 | static struct proc_dir_entry *ext4_proc_root; | 55 | static struct proc_dir_entry *ext4_proc_root; |
56 | static struct kset *ext4_kset; | 56 | static struct kset *ext4_kset; |
57 | struct ext4_lazy_init *ext4_li_info; | 57 | static struct ext4_lazy_init *ext4_li_info; |
58 | struct mutex ext4_li_mtx; | 58 | static struct mutex ext4_li_mtx; |
59 | struct ext4_features *ext4_feat; | 59 | static struct ext4_features *ext4_feat; |
60 | 60 | ||
61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
62 | unsigned long journal_devnum); | 62 | unsigned long journal_devnum); |
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb); | |||
75 | static int ext4_freeze(struct super_block *sb); | 75 | static int ext4_freeze(struct super_block *sb); |
76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | 76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
77 | const char *dev_name, void *data); | 77 | const char *dev_name, void *data); |
78 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); | ||
78 | static void ext4_destroy_lazyinit_thread(void); | 79 | static void ext4_destroy_lazyinit_thread(void); |
79 | static void ext4_unregister_li_request(struct super_block *sb); | 80 | static void ext4_unregister_li_request(struct super_block *sb); |
80 | static void ext4_clear_request_list(void); | 81 | static void ext4_clear_request_list(void); |
@@ -594,7 +595,7 @@ __acquires(bitlock) | |||
594 | 595 | ||
595 | vaf.fmt = fmt; | 596 | vaf.fmt = fmt; |
596 | vaf.va = &args; | 597 | vaf.va = &args; |
597 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", | 598 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", |
598 | sb->s_id, function, line, grp); | 599 | sb->s_id, function, line, grp); |
599 | if (ino) | 600 | if (ino) |
600 | printk(KERN_CONT "inode %lu: ", ino); | 601 | printk(KERN_CONT "inode %lu: ", ino); |
@@ -997,13 +998,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
997 | if (test_opt(sb, OLDALLOC)) | 998 | if (test_opt(sb, OLDALLOC)) |
998 | seq_puts(seq, ",oldalloc"); | 999 | seq_puts(seq, ",oldalloc"); |
999 | #ifdef CONFIG_EXT4_FS_XATTR | 1000 | #ifdef CONFIG_EXT4_FS_XATTR |
1000 | if (test_opt(sb, XATTR_USER) && | 1001 | if (test_opt(sb, XATTR_USER)) |
1001 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | ||
1002 | seq_puts(seq, ",user_xattr"); | 1002 | seq_puts(seq, ",user_xattr"); |
1003 | if (!test_opt(sb, XATTR_USER) && | 1003 | if (!test_opt(sb, XATTR_USER)) |
1004 | (def_mount_opts & EXT4_DEFM_XATTR_USER)) { | ||
1005 | seq_puts(seq, ",nouser_xattr"); | 1004 | seq_puts(seq, ",nouser_xattr"); |
1006 | } | ||
1007 | #endif | 1005 | #endif |
1008 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1006 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1009 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 1007 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
@@ -1041,8 +1039,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1041 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | 1039 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) |
1042 | seq_puts(seq, ",nodelalloc"); | 1040 | seq_puts(seq, ",nodelalloc"); |
1043 | 1041 | ||
1044 | if (test_opt(sb, MBLK_IO_SUBMIT)) | 1042 | if (!test_opt(sb, MBLK_IO_SUBMIT)) |
1045 | seq_puts(seq, ",mblk_io_submit"); | 1043 | seq_puts(seq, ",nomblk_io_submit"); |
1046 | if (sbi->s_stripe) | 1044 | if (sbi->s_stripe) |
1047 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1045 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
1048 | /* | 1046 | /* |
@@ -1451,7 +1449,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1451 | * Initialize args struct so we know whether arg was | 1449 | * Initialize args struct so we know whether arg was |
1452 | * found; some options take optional arguments. | 1450 | * found; some options take optional arguments. |
1453 | */ | 1451 | */ |
1454 | args[0].to = args[0].from = 0; | 1452 | args[0].to = args[0].from = NULL; |
1455 | token = match_token(p, tokens, args); | 1453 | token = match_token(p, tokens, args); |
1456 | switch (token) { | 1454 | switch (token) { |
1457 | case Opt_bsd_df: | 1455 | case Opt_bsd_df: |
@@ -1771,7 +1769,7 @@ set_qf_format: | |||
1771 | return 0; | 1769 | return 0; |
1772 | if (option < 0 || option > (1 << 30)) | 1770 | if (option < 0 || option > (1 << 30)) |
1773 | return 0; | 1771 | return 0; |
1774 | if (!is_power_of_2(option)) { | 1772 | if (option && !is_power_of_2(option)) { |
1775 | ext4_msg(sb, KERN_ERR, | 1773 | ext4_msg(sb, KERN_ERR, |
1776 | "EXT4-fs: inode_readahead_blks" | 1774 | "EXT4-fs: inode_readahead_blks" |
1777 | " must be a power of 2"); | 1775 | " must be a power of 2"); |
@@ -2120,6 +2118,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2120 | return; | 2118 | return; |
2121 | } | 2119 | } |
2122 | 2120 | ||
2121 | /* Check if feature set would not allow a r/w mount */ | ||
2122 | if (!ext4_feature_set_ok(sb, 0)) { | ||
2123 | ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " | ||
2124 | "unknown ROCOMPAT features"); | ||
2125 | return; | ||
2126 | } | ||
2127 | |||
2123 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2128 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2124 | if (es->s_last_orphan) | 2129 | if (es->s_last_orphan) |
2125 | jbd_debug(1, "Errors on filesystem, " | 2130 | jbd_debug(1, "Errors on filesystem, " |
@@ -2412,7 +2417,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2412 | if (parse_strtoul(buf, 0x40000000, &t)) | 2417 | if (parse_strtoul(buf, 0x40000000, &t)) |
2413 | return -EINVAL; | 2418 | return -EINVAL; |
2414 | 2419 | ||
2415 | if (!is_power_of_2(t)) | 2420 | if (t && !is_power_of_2(t)) |
2416 | return -EINVAL; | 2421 | return -EINVAL; |
2417 | 2422 | ||
2418 | sbi->s_inode_readahead_blks = t; | 2423 | sbi->s_inode_readahead_blks = t; |
@@ -3095,14 +3100,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3095 | } | 3100 | } |
3096 | if (def_mount_opts & EXT4_DEFM_UID16) | 3101 | if (def_mount_opts & EXT4_DEFM_UID16) |
3097 | set_opt(sb, NO_UID32); | 3102 | set_opt(sb, NO_UID32); |
3103 | /* xattr user namespace & acls are now defaulted on */ | ||
3098 | #ifdef CONFIG_EXT4_FS_XATTR | 3104 | #ifdef CONFIG_EXT4_FS_XATTR |
3099 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 3105 | set_opt(sb, XATTR_USER); |
3100 | set_opt(sb, XATTR_USER); | ||
3101 | #endif | 3106 | #endif |
3102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3107 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3103 | if (def_mount_opts & EXT4_DEFM_ACL) | 3108 | set_opt(sb, POSIX_ACL); |
3104 | set_opt(sb, POSIX_ACL); | ||
3105 | #endif | 3109 | #endif |
3110 | set_opt(sb, MBLK_IO_SUBMIT); | ||
3106 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3111 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
3107 | set_opt(sb, JOURNAL_DATA); | 3112 | set_opt(sb, JOURNAL_DATA); |
3108 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3113 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
@@ -3516,7 +3521,7 @@ no_journal: | |||
3516 | * concurrency isn't really necessary. Limit it to 1. | 3521 | * concurrency isn't really necessary. Limit it to 1. |
3517 | */ | 3522 | */ |
3518 | EXT4_SB(sb)->dio_unwritten_wq = | 3523 | EXT4_SB(sb)->dio_unwritten_wq = |
3519 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); | 3524 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
3520 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3525 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3521 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3526 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
3522 | goto failed_mount_wq; | 3527 | goto failed_mount_wq; |
@@ -3531,17 +3536,16 @@ no_journal: | |||
3531 | if (IS_ERR(root)) { | 3536 | if (IS_ERR(root)) { |
3532 | ext4_msg(sb, KERN_ERR, "get root inode failed"); | 3537 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
3533 | ret = PTR_ERR(root); | 3538 | ret = PTR_ERR(root); |
3539 | root = NULL; | ||
3534 | goto failed_mount4; | 3540 | goto failed_mount4; |
3535 | } | 3541 | } |
3536 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 3542 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
3537 | iput(root); | ||
3538 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); | 3543 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
3539 | goto failed_mount4; | 3544 | goto failed_mount4; |
3540 | } | 3545 | } |
3541 | sb->s_root = d_alloc_root(root); | 3546 | sb->s_root = d_alloc_root(root); |
3542 | if (!sb->s_root) { | 3547 | if (!sb->s_root) { |
3543 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); | 3548 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
3544 | iput(root); | ||
3545 | ret = -ENOMEM; | 3549 | ret = -ENOMEM; |
3546 | goto failed_mount4; | 3550 | goto failed_mount4; |
3547 | } | 3551 | } |
@@ -3657,6 +3661,8 @@ cantfind_ext4: | |||
3657 | goto failed_mount; | 3661 | goto failed_mount; |
3658 | 3662 | ||
3659 | failed_mount4: | 3663 | failed_mount4: |
3664 | iput(root); | ||
3665 | sb->s_root = NULL; | ||
3660 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3666 | ext4_msg(sb, KERN_ERR, "mount failed"); |
3661 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3667 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
3662 | failed_mount_wq: | 3668 | failed_mount_wq: |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fc32176eee39..b545ca1c459c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
735 | int offset = (char *)s->here - bs->bh->b_data; | 735 | int offset = (char *)s->here - bs->bh->b_data; |
736 | 736 | ||
737 | unlock_buffer(bs->bh); | 737 | unlock_buffer(bs->bh); |
738 | jbd2_journal_release_buffer(handle, bs->bh); | 738 | ext4_handle_release_buffer(handle, bs->bh); |
739 | if (ce) { | 739 | if (ce) { |
740 | mb_cache_entry_release(ce); | 740 | mb_cache_entry_release(ce); |
741 | ce = NULL; | 741 | ce = NULL; |
@@ -833,7 +833,7 @@ inserted: | |||
833 | new_bh = sb_getblk(sb, block); | 833 | new_bh = sb_getblk(sb, block); |
834 | if (!new_bh) { | 834 | if (!new_bh) { |
835 | getblk_failed: | 835 | getblk_failed: |
836 | ext4_free_blocks(handle, inode, 0, block, 1, | 836 | ext4_free_blocks(handle, inode, NULL, block, 1, |
837 | EXT4_FREE_BLOCKS_METADATA); | 837 | EXT4_FREE_BLOCKS_METADATA); |
838 | error = -EIO; | 838 | error = -EIO; |
839 | goto cleanup; | 839 | goto cleanup; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 59c6e4956786..b5ed541fb137 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -176,6 +176,17 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
176 | } | 176 | } |
177 | 177 | ||
178 | /* | 178 | /* |
179 | * Remove the inode from the writeback list it is on. | ||
180 | */ | ||
181 | void inode_wb_list_del(struct inode *inode) | ||
182 | { | ||
183 | spin_lock(&inode_wb_list_lock); | ||
184 | list_del_init(&inode->i_wb_list); | ||
185 | spin_unlock(&inode_wb_list_lock); | ||
186 | } | ||
187 | |||
188 | |||
189 | /* | ||
179 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the | 190 | * Redirty an inode: set its when-it-was dirtied timestamp and move it to the |
180 | * furthest end of its superblock's dirty-inode list. | 191 | * furthest end of its superblock's dirty-inode list. |
181 | * | 192 | * |
@@ -188,6 +199,7 @@ static void redirty_tail(struct inode *inode) | |||
188 | { | 199 | { |
189 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 200 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
190 | 201 | ||
202 | assert_spin_locked(&inode_wb_list_lock); | ||
191 | if (!list_empty(&wb->b_dirty)) { | 203 | if (!list_empty(&wb->b_dirty)) { |
192 | struct inode *tail; | 204 | struct inode *tail; |
193 | 205 | ||
@@ -205,14 +217,17 @@ static void requeue_io(struct inode *inode) | |||
205 | { | 217 | { |
206 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 218 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
207 | 219 | ||
220 | assert_spin_locked(&inode_wb_list_lock); | ||
208 | list_move(&inode->i_wb_list, &wb->b_more_io); | 221 | list_move(&inode->i_wb_list, &wb->b_more_io); |
209 | } | 222 | } |
210 | 223 | ||
211 | static void inode_sync_complete(struct inode *inode) | 224 | static void inode_sync_complete(struct inode *inode) |
212 | { | 225 | { |
213 | /* | 226 | /* |
214 | * Prevent speculative execution through spin_unlock(&inode_lock); | 227 | * Prevent speculative execution through |
228 | * spin_unlock(&inode_wb_list_lock); | ||
215 | */ | 229 | */ |
230 | |||
216 | smp_mb(); | 231 | smp_mb(); |
217 | wake_up_bit(&inode->i_state, __I_SYNC); | 232 | wake_up_bit(&inode->i_state, __I_SYNC); |
218 | } | 233 | } |
@@ -286,6 +301,7 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
286 | */ | 301 | */ |
287 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) | 302 | static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) |
288 | { | 303 | { |
304 | assert_spin_locked(&inode_wb_list_lock); | ||
289 | list_splice_init(&wb->b_more_io, &wb->b_io); | 305 | list_splice_init(&wb->b_more_io, &wb->b_io); |
290 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); | 306 | move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); |
291 | } | 307 | } |
@@ -306,25 +322,25 @@ static void inode_wait_for_writeback(struct inode *inode) | |||
306 | wait_queue_head_t *wqh; | 322 | wait_queue_head_t *wqh; |
307 | 323 | ||
308 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); | 324 | wqh = bit_waitqueue(&inode->i_state, __I_SYNC); |
309 | while (inode->i_state & I_SYNC) { | 325 | while (inode->i_state & I_SYNC) { |
310 | spin_unlock(&inode_lock); | 326 | spin_unlock(&inode->i_lock); |
327 | spin_unlock(&inode_wb_list_lock); | ||
311 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); | 328 | __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); |
312 | spin_lock(&inode_lock); | 329 | spin_lock(&inode_wb_list_lock); |
330 | spin_lock(&inode->i_lock); | ||
313 | } | 331 | } |
314 | } | 332 | } |
315 | 333 | ||
316 | /* | 334 | /* |
317 | * Write out an inode's dirty pages. Called under inode_lock. Either the | 335 | * Write out an inode's dirty pages. Called under inode_wb_list_lock and |
318 | * caller has ref on the inode (either via __iget or via syscall against an fd) | 336 | * inode->i_lock. Either the caller has an active reference on the inode or |
319 | * or the inode has I_WILL_FREE set (via generic_forget_inode) | 337 | * the inode has I_WILL_FREE set. |
320 | * | 338 | * |
321 | * If `wait' is set, wait on the writeout. | 339 | * If `wait' is set, wait on the writeout. |
322 | * | 340 | * |
323 | * The whole writeout design is quite complex and fragile. We want to avoid | 341 | * The whole writeout design is quite complex and fragile. We want to avoid |
324 | * starvation of particular inodes when others are being redirtied, prevent | 342 | * starvation of particular inodes when others are being redirtied, prevent |
325 | * livelocks, etc. | 343 | * livelocks, etc. |
326 | * | ||
327 | * Called under inode_lock. | ||
328 | */ | 344 | */ |
329 | static int | 345 | static int |
330 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | 346 | writeback_single_inode(struct inode *inode, struct writeback_control *wbc) |
@@ -333,6 +349,9 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
333 | unsigned dirty; | 349 | unsigned dirty; |
334 | int ret; | 350 | int ret; |
335 | 351 | ||
352 | assert_spin_locked(&inode_wb_list_lock); | ||
353 | assert_spin_locked(&inode->i_lock); | ||
354 | |||
336 | if (!atomic_read(&inode->i_count)) | 355 | if (!atomic_read(&inode->i_count)) |
337 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); | 356 | WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); |
338 | else | 357 | else |
@@ -363,7 +382,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
363 | /* Set I_SYNC, reset I_DIRTY_PAGES */ | 382 | /* Set I_SYNC, reset I_DIRTY_PAGES */ |
364 | inode->i_state |= I_SYNC; | 383 | inode->i_state |= I_SYNC; |
365 | inode->i_state &= ~I_DIRTY_PAGES; | 384 | inode->i_state &= ~I_DIRTY_PAGES; |
366 | spin_unlock(&inode_lock); | 385 | spin_unlock(&inode->i_lock); |
386 | spin_unlock(&inode_wb_list_lock); | ||
367 | 387 | ||
368 | ret = do_writepages(mapping, wbc); | 388 | ret = do_writepages(mapping, wbc); |
369 | 389 | ||
@@ -383,10 +403,10 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
383 | * due to delalloc, clear dirty metadata flags right before | 403 | * due to delalloc, clear dirty metadata flags right before |
384 | * write_inode() | 404 | * write_inode() |
385 | */ | 405 | */ |
386 | spin_lock(&inode_lock); | 406 | spin_lock(&inode->i_lock); |
387 | dirty = inode->i_state & I_DIRTY; | 407 | dirty = inode->i_state & I_DIRTY; |
388 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | 408 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); |
389 | spin_unlock(&inode_lock); | 409 | spin_unlock(&inode->i_lock); |
390 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 410 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
391 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 411 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
392 | int err = write_inode(inode, wbc); | 412 | int err = write_inode(inode, wbc); |
@@ -394,7 +414,8 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
394 | ret = err; | 414 | ret = err; |
395 | } | 415 | } |
396 | 416 | ||
397 | spin_lock(&inode_lock); | 417 | spin_lock(&inode_wb_list_lock); |
418 | spin_lock(&inode->i_lock); | ||
398 | inode->i_state &= ~I_SYNC; | 419 | inode->i_state &= ~I_SYNC; |
399 | if (!(inode->i_state & I_FREEING)) { | 420 | if (!(inode->i_state & I_FREEING)) { |
400 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 421 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { |
@@ -506,7 +527,9 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
506 | * kind does not need peridic writeout yet, and for the latter | 527 | * kind does not need peridic writeout yet, and for the latter |
507 | * kind writeout is handled by the freer. | 528 | * kind writeout is handled by the freer. |
508 | */ | 529 | */ |
530 | spin_lock(&inode->i_lock); | ||
509 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | 531 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
532 | spin_unlock(&inode->i_lock); | ||
510 | requeue_io(inode); | 533 | requeue_io(inode); |
511 | continue; | 534 | continue; |
512 | } | 535 | } |
@@ -515,10 +538,13 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
515 | * Was this inode dirtied after sync_sb_inodes was called? | 538 | * Was this inode dirtied after sync_sb_inodes was called? |
516 | * This keeps sync from extra jobs and livelock. | 539 | * This keeps sync from extra jobs and livelock. |
517 | */ | 540 | */ |
518 | if (inode_dirtied_after(inode, wbc->wb_start)) | 541 | if (inode_dirtied_after(inode, wbc->wb_start)) { |
542 | spin_unlock(&inode->i_lock); | ||
519 | return 1; | 543 | return 1; |
544 | } | ||
520 | 545 | ||
521 | __iget(inode); | 546 | __iget(inode); |
547 | |||
522 | pages_skipped = wbc->pages_skipped; | 548 | pages_skipped = wbc->pages_skipped; |
523 | writeback_single_inode(inode, wbc); | 549 | writeback_single_inode(inode, wbc); |
524 | if (wbc->pages_skipped != pages_skipped) { | 550 | if (wbc->pages_skipped != pages_skipped) { |
@@ -528,10 +554,11 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
528 | */ | 554 | */ |
529 | redirty_tail(inode); | 555 | redirty_tail(inode); |
530 | } | 556 | } |
531 | spin_unlock(&inode_lock); | 557 | spin_unlock(&inode->i_lock); |
558 | spin_unlock(&inode_wb_list_lock); | ||
532 | iput(inode); | 559 | iput(inode); |
533 | cond_resched(); | 560 | cond_resched(); |
534 | spin_lock(&inode_lock); | 561 | spin_lock(&inode_wb_list_lock); |
535 | if (wbc->nr_to_write <= 0) { | 562 | if (wbc->nr_to_write <= 0) { |
536 | wbc->more_io = 1; | 563 | wbc->more_io = 1; |
537 | return 1; | 564 | return 1; |
@@ -550,7 +577,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
550 | 577 | ||
551 | if (!wbc->wb_start) | 578 | if (!wbc->wb_start) |
552 | wbc->wb_start = jiffies; /* livelock avoidance */ | 579 | wbc->wb_start = jiffies; /* livelock avoidance */ |
553 | spin_lock(&inode_lock); | 580 | spin_lock(&inode_wb_list_lock); |
554 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 581 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
555 | queue_io(wb, wbc->older_than_this); | 582 | queue_io(wb, wbc->older_than_this); |
556 | 583 | ||
@@ -568,7 +595,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
568 | if (ret) | 595 | if (ret) |
569 | break; | 596 | break; |
570 | } | 597 | } |
571 | spin_unlock(&inode_lock); | 598 | spin_unlock(&inode_wb_list_lock); |
572 | /* Leave any unwritten inodes on b_io */ | 599 | /* Leave any unwritten inodes on b_io */ |
573 | } | 600 | } |
574 | 601 | ||
@@ -577,11 +604,11 @@ static void __writeback_inodes_sb(struct super_block *sb, | |||
577 | { | 604 | { |
578 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 605 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
579 | 606 | ||
580 | spin_lock(&inode_lock); | 607 | spin_lock(&inode_wb_list_lock); |
581 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) | 608 | if (!wbc->for_kupdate || list_empty(&wb->b_io)) |
582 | queue_io(wb, wbc->older_than_this); | 609 | queue_io(wb, wbc->older_than_this); |
583 | writeback_sb_inodes(sb, wb, wbc, true); | 610 | writeback_sb_inodes(sb, wb, wbc, true); |
584 | spin_unlock(&inode_lock); | 611 | spin_unlock(&inode_wb_list_lock); |
585 | } | 612 | } |
586 | 613 | ||
587 | /* | 614 | /* |
@@ -720,13 +747,15 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
720 | * become available for writeback. Otherwise | 747 | * become available for writeback. Otherwise |
721 | * we'll just busyloop. | 748 | * we'll just busyloop. |
722 | */ | 749 | */ |
723 | spin_lock(&inode_lock); | 750 | spin_lock(&inode_wb_list_lock); |
724 | if (!list_empty(&wb->b_more_io)) { | 751 | if (!list_empty(&wb->b_more_io)) { |
725 | inode = wb_inode(wb->b_more_io.prev); | 752 | inode = wb_inode(wb->b_more_io.prev); |
726 | trace_wbc_writeback_wait(&wbc, wb->bdi); | 753 | trace_wbc_writeback_wait(&wbc, wb->bdi); |
754 | spin_lock(&inode->i_lock); | ||
727 | inode_wait_for_writeback(inode); | 755 | inode_wait_for_writeback(inode); |
756 | spin_unlock(&inode->i_lock); | ||
728 | } | 757 | } |
729 | spin_unlock(&inode_lock); | 758 | spin_unlock(&inode_wb_list_lock); |
730 | } | 759 | } |
731 | 760 | ||
732 | return wrote; | 761 | return wrote; |
@@ -992,7 +1021,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
992 | { | 1021 | { |
993 | struct super_block *sb = inode->i_sb; | 1022 | struct super_block *sb = inode->i_sb; |
994 | struct backing_dev_info *bdi = NULL; | 1023 | struct backing_dev_info *bdi = NULL; |
995 | bool wakeup_bdi = false; | ||
996 | 1024 | ||
997 | /* | 1025 | /* |
998 | * Don't do this for I_DIRTY_PAGES - that doesn't actually | 1026 | * Don't do this for I_DIRTY_PAGES - that doesn't actually |
@@ -1016,7 +1044,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1016 | if (unlikely(block_dump)) | 1044 | if (unlikely(block_dump)) |
1017 | block_dump___mark_inode_dirty(inode); | 1045 | block_dump___mark_inode_dirty(inode); |
1018 | 1046 | ||
1019 | spin_lock(&inode_lock); | 1047 | spin_lock(&inode->i_lock); |
1020 | if ((inode->i_state & flags) != flags) { | 1048 | if ((inode->i_state & flags) != flags) { |
1021 | const int was_dirty = inode->i_state & I_DIRTY; | 1049 | const int was_dirty = inode->i_state & I_DIRTY; |
1022 | 1050 | ||
@@ -1028,7 +1056,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1028 | * superblock list, based upon its state. | 1056 | * superblock list, based upon its state. |
1029 | */ | 1057 | */ |
1030 | if (inode->i_state & I_SYNC) | 1058 | if (inode->i_state & I_SYNC) |
1031 | goto out; | 1059 | goto out_unlock_inode; |
1032 | 1060 | ||
1033 | /* | 1061 | /* |
1034 | * Only add valid (hashed) inodes to the superblock's | 1062 | * Only add valid (hashed) inodes to the superblock's |
@@ -1036,16 +1064,17 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1036 | */ | 1064 | */ |
1037 | if (!S_ISBLK(inode->i_mode)) { | 1065 | if (!S_ISBLK(inode->i_mode)) { |
1038 | if (inode_unhashed(inode)) | 1066 | if (inode_unhashed(inode)) |
1039 | goto out; | 1067 | goto out_unlock_inode; |
1040 | } | 1068 | } |
1041 | if (inode->i_state & I_FREEING) | 1069 | if (inode->i_state & I_FREEING) |
1042 | goto out; | 1070 | goto out_unlock_inode; |
1043 | 1071 | ||
1044 | /* | 1072 | /* |
1045 | * If the inode was already on b_dirty/b_io/b_more_io, don't | 1073 | * If the inode was already on b_dirty/b_io/b_more_io, don't |
1046 | * reposition it (that would break b_dirty time-ordering). | 1074 | * reposition it (that would break b_dirty time-ordering). |
1047 | */ | 1075 | */ |
1048 | if (!was_dirty) { | 1076 | if (!was_dirty) { |
1077 | bool wakeup_bdi = false; | ||
1049 | bdi = inode_to_bdi(inode); | 1078 | bdi = inode_to_bdi(inode); |
1050 | 1079 | ||
1051 | if (bdi_cap_writeback_dirty(bdi)) { | 1080 | if (bdi_cap_writeback_dirty(bdi)) { |
@@ -1062,15 +1091,20 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1062 | wakeup_bdi = true; | 1091 | wakeup_bdi = true; |
1063 | } | 1092 | } |
1064 | 1093 | ||
1094 | spin_unlock(&inode->i_lock); | ||
1095 | spin_lock(&inode_wb_list_lock); | ||
1065 | inode->dirtied_when = jiffies; | 1096 | inode->dirtied_when = jiffies; |
1066 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1097 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1098 | spin_unlock(&inode_wb_list_lock); | ||
1099 | |||
1100 | if (wakeup_bdi) | ||
1101 | bdi_wakeup_thread_delayed(bdi); | ||
1102 | return; | ||
1067 | } | 1103 | } |
1068 | } | 1104 | } |
1069 | out: | 1105 | out_unlock_inode: |
1070 | spin_unlock(&inode_lock); | 1106 | spin_unlock(&inode->i_lock); |
1071 | 1107 | ||
1072 | if (wakeup_bdi) | ||
1073 | bdi_wakeup_thread_delayed(bdi); | ||
1074 | } | 1108 | } |
1075 | EXPORT_SYMBOL(__mark_inode_dirty); | 1109 | EXPORT_SYMBOL(__mark_inode_dirty); |
1076 | 1110 | ||
@@ -1101,7 +1135,7 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1101 | */ | 1135 | */ |
1102 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1136 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1103 | 1137 | ||
1104 | spin_lock(&inode_lock); | 1138 | spin_lock(&inode_sb_list_lock); |
1105 | 1139 | ||
1106 | /* | 1140 | /* |
1107 | * Data integrity sync. Must wait for all pages under writeback, | 1141 | * Data integrity sync. Must wait for all pages under writeback, |
@@ -1111,22 +1145,25 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1111 | * we still have to wait for that writeout. | 1145 | * we still have to wait for that writeout. |
1112 | */ | 1146 | */ |
1113 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1147 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1114 | struct address_space *mapping; | 1148 | struct address_space *mapping = inode->i_mapping; |
1115 | 1149 | ||
1116 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 1150 | spin_lock(&inode->i_lock); |
1117 | continue; | 1151 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || |
1118 | mapping = inode->i_mapping; | 1152 | (mapping->nrpages == 0)) { |
1119 | if (mapping->nrpages == 0) | 1153 | spin_unlock(&inode->i_lock); |
1120 | continue; | 1154 | continue; |
1155 | } | ||
1121 | __iget(inode); | 1156 | __iget(inode); |
1122 | spin_unlock(&inode_lock); | 1157 | spin_unlock(&inode->i_lock); |
1158 | spin_unlock(&inode_sb_list_lock); | ||
1159 | |||
1123 | /* | 1160 | /* |
1124 | * We hold a reference to 'inode' so it couldn't have | 1161 | * We hold a reference to 'inode' so it couldn't have been |
1125 | * been removed from s_inodes list while we dropped the | 1162 | * removed from s_inodes list while we dropped the |
1126 | * inode_lock. We cannot iput the inode now as we can | 1163 | * inode_sb_list_lock. We cannot iput the inode now as we can |
1127 | * be holding the last reference and we cannot iput it | 1164 | * be holding the last reference and we cannot iput it under |
1128 | * under inode_lock. So we keep the reference and iput | 1165 | * inode_sb_list_lock. So we keep the reference and iput it |
1129 | * it later. | 1166 | * later. |
1130 | */ | 1167 | */ |
1131 | iput(old_inode); | 1168 | iput(old_inode); |
1132 | old_inode = inode; | 1169 | old_inode = inode; |
@@ -1135,9 +1172,9 @@ static void wait_sb_inodes(struct super_block *sb) | |||
1135 | 1172 | ||
1136 | cond_resched(); | 1173 | cond_resched(); |
1137 | 1174 | ||
1138 | spin_lock(&inode_lock); | 1175 | spin_lock(&inode_sb_list_lock); |
1139 | } | 1176 | } |
1140 | spin_unlock(&inode_lock); | 1177 | spin_unlock(&inode_sb_list_lock); |
1141 | iput(old_inode); | 1178 | iput(old_inode); |
1142 | } | 1179 | } |
1143 | 1180 | ||
@@ -1271,9 +1308,11 @@ int write_inode_now(struct inode *inode, int sync) | |||
1271 | wbc.nr_to_write = 0; | 1308 | wbc.nr_to_write = 0; |
1272 | 1309 | ||
1273 | might_sleep(); | 1310 | might_sleep(); |
1274 | spin_lock(&inode_lock); | 1311 | spin_lock(&inode_wb_list_lock); |
1312 | spin_lock(&inode->i_lock); | ||
1275 | ret = writeback_single_inode(inode, &wbc); | 1313 | ret = writeback_single_inode(inode, &wbc); |
1276 | spin_unlock(&inode_lock); | 1314 | spin_unlock(&inode->i_lock); |
1315 | spin_unlock(&inode_wb_list_lock); | ||
1277 | if (sync) | 1316 | if (sync) |
1278 | inode_sync_wait(inode); | 1317 | inode_sync_wait(inode); |
1279 | return ret; | 1318 | return ret; |
@@ -1295,9 +1334,11 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1295 | { | 1334 | { |
1296 | int ret; | 1335 | int ret; |
1297 | 1336 | ||
1298 | spin_lock(&inode_lock); | 1337 | spin_lock(&inode_wb_list_lock); |
1338 | spin_lock(&inode->i_lock); | ||
1299 | ret = writeback_single_inode(inode, wbc); | 1339 | ret = writeback_single_inode(inode, wbc); |
1300 | spin_unlock(&inode_lock); | 1340 | spin_unlock(&inode->i_lock); |
1341 | spin_unlock(&inode_wb_list_lock); | ||
1301 | return ret; | 1342 | return ret; |
1302 | } | 1343 | } |
1303 | EXPORT_SYMBOL(sync_inode); | 1344 | EXPORT_SYMBOL(sync_inode); |
diff --git a/fs/inode.c b/fs/inode.c index 0b3da4a77704..5f4e11aaeb5c 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -26,6 +26,38 @@ | |||
26 | #include <linux/posix_acl.h> | 26 | #include <linux/posix_acl.h> |
27 | #include <linux/ima.h> | 27 | #include <linux/ima.h> |
28 | #include <linux/cred.h> | 28 | #include <linux/cred.h> |
29 | #include "internal.h" | ||
30 | |||
31 | /* | ||
32 | * inode locking rules. | ||
33 | * | ||
34 | * inode->i_lock protects: | ||
35 | * inode->i_state, inode->i_hash, __iget() | ||
36 | * inode_lru_lock protects: | ||
37 | * inode_lru, inode->i_lru | ||
38 | * inode_sb_list_lock protects: | ||
39 | * sb->s_inodes, inode->i_sb_list | ||
40 | * inode_wb_list_lock protects: | ||
41 | * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list | ||
42 | * inode_hash_lock protects: | ||
43 | * inode_hashtable, inode->i_hash | ||
44 | * | ||
45 | * Lock ordering: | ||
46 | * | ||
47 | * inode_sb_list_lock | ||
48 | * inode->i_lock | ||
49 | * inode_lru_lock | ||
50 | * | ||
51 | * inode_wb_list_lock | ||
52 | * inode->i_lock | ||
53 | * | ||
54 | * inode_hash_lock | ||
55 | * inode_sb_list_lock | ||
56 | * inode->i_lock | ||
57 | * | ||
58 | * iunique_lock | ||
59 | * inode_hash_lock | ||
60 | */ | ||
29 | 61 | ||
30 | /* | 62 | /* |
31 | * This is needed for the following functions: | 63 | * This is needed for the following functions: |
@@ -60,6 +92,8 @@ | |||
60 | 92 | ||
61 | static unsigned int i_hash_mask __read_mostly; | 93 | static unsigned int i_hash_mask __read_mostly; |
62 | static unsigned int i_hash_shift __read_mostly; | 94 | static unsigned int i_hash_shift __read_mostly; |
95 | static struct hlist_head *inode_hashtable __read_mostly; | ||
96 | static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); | ||
63 | 97 | ||
64 | /* | 98 | /* |
65 | * Each inode can be on two separate lists. One is | 99 | * Each inode can be on two separate lists. One is |
@@ -74,15 +108,10 @@ static unsigned int i_hash_shift __read_mostly; | |||
74 | */ | 108 | */ |
75 | 109 | ||
76 | static LIST_HEAD(inode_lru); | 110 | static LIST_HEAD(inode_lru); |
77 | static struct hlist_head *inode_hashtable __read_mostly; | 111 | static DEFINE_SPINLOCK(inode_lru_lock); |
78 | 112 | ||
79 | /* | 113 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); |
80 | * A simple spinlock to protect the list manipulations. | 114 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); |
81 | * | ||
82 | * NOTE! You also have to own the lock if you change | ||
83 | * the i_state of an inode while it is in use.. | ||
84 | */ | ||
85 | DEFINE_SPINLOCK(inode_lock); | ||
86 | 115 | ||
87 | /* | 116 | /* |
88 | * iprune_sem provides exclusion between the icache shrinking and the | 117 | * iprune_sem provides exclusion between the icache shrinking and the |
@@ -137,15 +166,6 @@ int proc_nr_inodes(ctl_table *table, int write, | |||
137 | } | 166 | } |
138 | #endif | 167 | #endif |
139 | 168 | ||
140 | static void wake_up_inode(struct inode *inode) | ||
141 | { | ||
142 | /* | ||
143 | * Prevent speculative execution through spin_unlock(&inode_lock); | ||
144 | */ | ||
145 | smp_mb(); | ||
146 | wake_up_bit(&inode->i_state, __I_NEW); | ||
147 | } | ||
148 | |||
149 | /** | 169 | /** |
150 | * inode_init_always - perform inode structure intialisation | 170 | * inode_init_always - perform inode structure intialisation |
151 | * @sb: superblock inode belongs to | 171 | * @sb: superblock inode belongs to |
@@ -336,7 +356,7 @@ static void init_once(void *foo) | |||
336 | } | 356 | } |
337 | 357 | ||
338 | /* | 358 | /* |
339 | * inode_lock must be held | 359 | * inode->i_lock must be held |
340 | */ | 360 | */ |
341 | void __iget(struct inode *inode) | 361 | void __iget(struct inode *inode) |
342 | { | 362 | { |
@@ -354,23 +374,22 @@ EXPORT_SYMBOL(ihold); | |||
354 | 374 | ||
355 | static void inode_lru_list_add(struct inode *inode) | 375 | static void inode_lru_list_add(struct inode *inode) |
356 | { | 376 | { |
377 | spin_lock(&inode_lru_lock); | ||
357 | if (list_empty(&inode->i_lru)) { | 378 | if (list_empty(&inode->i_lru)) { |
358 | list_add(&inode->i_lru, &inode_lru); | 379 | list_add(&inode->i_lru, &inode_lru); |
359 | inodes_stat.nr_unused++; | 380 | inodes_stat.nr_unused++; |
360 | } | 381 | } |
382 | spin_unlock(&inode_lru_lock); | ||
361 | } | 383 | } |
362 | 384 | ||
363 | static void inode_lru_list_del(struct inode *inode) | 385 | static void inode_lru_list_del(struct inode *inode) |
364 | { | 386 | { |
387 | spin_lock(&inode_lru_lock); | ||
365 | if (!list_empty(&inode->i_lru)) { | 388 | if (!list_empty(&inode->i_lru)) { |
366 | list_del_init(&inode->i_lru); | 389 | list_del_init(&inode->i_lru); |
367 | inodes_stat.nr_unused--; | 390 | inodes_stat.nr_unused--; |
368 | } | 391 | } |
369 | } | 392 | spin_unlock(&inode_lru_lock); |
370 | |||
371 | static inline void __inode_sb_list_add(struct inode *inode) | ||
372 | { | ||
373 | list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); | ||
374 | } | 393 | } |
375 | 394 | ||
376 | /** | 395 | /** |
@@ -379,15 +398,17 @@ static inline void __inode_sb_list_add(struct inode *inode) | |||
379 | */ | 398 | */ |
380 | void inode_sb_list_add(struct inode *inode) | 399 | void inode_sb_list_add(struct inode *inode) |
381 | { | 400 | { |
382 | spin_lock(&inode_lock); | 401 | spin_lock(&inode_sb_list_lock); |
383 | __inode_sb_list_add(inode); | 402 | list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); |
384 | spin_unlock(&inode_lock); | 403 | spin_unlock(&inode_sb_list_lock); |
385 | } | 404 | } |
386 | EXPORT_SYMBOL_GPL(inode_sb_list_add); | 405 | EXPORT_SYMBOL_GPL(inode_sb_list_add); |
387 | 406 | ||
388 | static inline void __inode_sb_list_del(struct inode *inode) | 407 | static inline void inode_sb_list_del(struct inode *inode) |
389 | { | 408 | { |
409 | spin_lock(&inode_sb_list_lock); | ||
390 | list_del_init(&inode->i_sb_list); | 410 | list_del_init(&inode->i_sb_list); |
411 | spin_unlock(&inode_sb_list_lock); | ||
391 | } | 412 | } |
392 | 413 | ||
393 | static unsigned long hash(struct super_block *sb, unsigned long hashval) | 414 | static unsigned long hash(struct super_block *sb, unsigned long hashval) |
@@ -412,24 +433,15 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) | |||
412 | { | 433 | { |
413 | struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); | 434 | struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); |
414 | 435 | ||
415 | spin_lock(&inode_lock); | 436 | spin_lock(&inode_hash_lock); |
437 | spin_lock(&inode->i_lock); | ||
416 | hlist_add_head(&inode->i_hash, b); | 438 | hlist_add_head(&inode->i_hash, b); |
417 | spin_unlock(&inode_lock); | 439 | spin_unlock(&inode->i_lock); |
440 | spin_unlock(&inode_hash_lock); | ||
418 | } | 441 | } |
419 | EXPORT_SYMBOL(__insert_inode_hash); | 442 | EXPORT_SYMBOL(__insert_inode_hash); |
420 | 443 | ||
421 | /** | 444 | /** |
422 | * __remove_inode_hash - remove an inode from the hash | ||
423 | * @inode: inode to unhash | ||
424 | * | ||
425 | * Remove an inode from the superblock. | ||
426 | */ | ||
427 | static void __remove_inode_hash(struct inode *inode) | ||
428 | { | ||
429 | hlist_del_init(&inode->i_hash); | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * remove_inode_hash - remove an inode from the hash | 445 | * remove_inode_hash - remove an inode from the hash |
434 | * @inode: inode to unhash | 446 | * @inode: inode to unhash |
435 | * | 447 | * |
@@ -437,9 +449,11 @@ static void __remove_inode_hash(struct inode *inode) | |||
437 | */ | 449 | */ |
438 | void remove_inode_hash(struct inode *inode) | 450 | void remove_inode_hash(struct inode *inode) |
439 | { | 451 | { |
440 | spin_lock(&inode_lock); | 452 | spin_lock(&inode_hash_lock); |
453 | spin_lock(&inode->i_lock); | ||
441 | hlist_del_init(&inode->i_hash); | 454 | hlist_del_init(&inode->i_hash); |
442 | spin_unlock(&inode_lock); | 455 | spin_unlock(&inode->i_lock); |
456 | spin_unlock(&inode_hash_lock); | ||
443 | } | 457 | } |
444 | EXPORT_SYMBOL(remove_inode_hash); | 458 | EXPORT_SYMBOL(remove_inode_hash); |
445 | 459 | ||
@@ -456,10 +470,29 @@ void end_writeback(struct inode *inode) | |||
456 | } | 470 | } |
457 | EXPORT_SYMBOL(end_writeback); | 471 | EXPORT_SYMBOL(end_writeback); |
458 | 472 | ||
473 | /* | ||
474 | * Free the inode passed in, removing it from the lists it is still connected | ||
475 | * to. We remove any pages still attached to the inode and wait for any IO that | ||
476 | * is still in progress before finally destroying the inode. | ||
477 | * | ||
478 | * An inode must already be marked I_FREEING so that we avoid the inode being | ||
479 | * moved back onto lists if we race with other code that manipulates the lists | ||
480 | * (e.g. writeback_single_inode). The caller is responsible for setting this. | ||
481 | * | ||
482 | * An inode must already be removed from the LRU list before being evicted from | ||
483 | * the cache. This should occur atomically with setting the I_FREEING state | ||
484 | * flag, so no inodes here should ever be on the LRU when being evicted. | ||
485 | */ | ||
459 | static void evict(struct inode *inode) | 486 | static void evict(struct inode *inode) |
460 | { | 487 | { |
461 | const struct super_operations *op = inode->i_sb->s_op; | 488 | const struct super_operations *op = inode->i_sb->s_op; |
462 | 489 | ||
490 | BUG_ON(!(inode->i_state & I_FREEING)); | ||
491 | BUG_ON(!list_empty(&inode->i_lru)); | ||
492 | |||
493 | inode_wb_list_del(inode); | ||
494 | inode_sb_list_del(inode); | ||
495 | |||
463 | if (op->evict_inode) { | 496 | if (op->evict_inode) { |
464 | op->evict_inode(inode); | 497 | op->evict_inode(inode); |
465 | } else { | 498 | } else { |
@@ -471,6 +504,15 @@ static void evict(struct inode *inode) | |||
471 | bd_forget(inode); | 504 | bd_forget(inode); |
472 | if (S_ISCHR(inode->i_mode) && inode->i_cdev) | 505 | if (S_ISCHR(inode->i_mode) && inode->i_cdev) |
473 | cd_forget(inode); | 506 | cd_forget(inode); |
507 | |||
508 | remove_inode_hash(inode); | ||
509 | |||
510 | spin_lock(&inode->i_lock); | ||
511 | wake_up_bit(&inode->i_state, __I_NEW); | ||
512 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); | ||
513 | spin_unlock(&inode->i_lock); | ||
514 | |||
515 | destroy_inode(inode); | ||
474 | } | 516 | } |
475 | 517 | ||
476 | /* | 518 | /* |
@@ -489,14 +531,6 @@ static void dispose_list(struct list_head *head) | |||
489 | list_del_init(&inode->i_lru); | 531 | list_del_init(&inode->i_lru); |
490 | 532 | ||
491 | evict(inode); | 533 | evict(inode); |
492 | |||
493 | spin_lock(&inode_lock); | ||
494 | __remove_inode_hash(inode); | ||
495 | __inode_sb_list_del(inode); | ||
496 | spin_unlock(&inode_lock); | ||
497 | |||
498 | wake_up_inode(inode); | ||
499 | destroy_inode(inode); | ||
500 | } | 534 | } |
501 | } | 535 | } |
502 | 536 | ||
@@ -514,25 +548,23 @@ void evict_inodes(struct super_block *sb) | |||
514 | struct inode *inode, *next; | 548 | struct inode *inode, *next; |
515 | LIST_HEAD(dispose); | 549 | LIST_HEAD(dispose); |
516 | 550 | ||
517 | spin_lock(&inode_lock); | 551 | spin_lock(&inode_sb_list_lock); |
518 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 552 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
519 | if (atomic_read(&inode->i_count)) | 553 | if (atomic_read(&inode->i_count)) |
520 | continue; | 554 | continue; |
521 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | 555 | |
556 | spin_lock(&inode->i_lock); | ||
557 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
558 | spin_unlock(&inode->i_lock); | ||
522 | continue; | 559 | continue; |
560 | } | ||
523 | 561 | ||
524 | inode->i_state |= I_FREEING; | 562 | inode->i_state |= I_FREEING; |
525 | 563 | inode_lru_list_del(inode); | |
526 | /* | 564 | spin_unlock(&inode->i_lock); |
527 | * Move the inode off the IO lists and LRU once I_FREEING is | 565 | list_add(&inode->i_lru, &dispose); |
528 | * set so that it won't get moved back on there if it is dirty. | ||
529 | */ | ||
530 | list_move(&inode->i_lru, &dispose); | ||
531 | list_del_init(&inode->i_wb_list); | ||
532 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | ||
533 | inodes_stat.nr_unused--; | ||
534 | } | 566 | } |
535 | spin_unlock(&inode_lock); | 567 | spin_unlock(&inode_sb_list_lock); |
536 | 568 | ||
537 | dispose_list(&dispose); | 569 | dispose_list(&dispose); |
538 | 570 | ||
@@ -561,31 +593,30 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) | |||
561 | struct inode *inode, *next; | 593 | struct inode *inode, *next; |
562 | LIST_HEAD(dispose); | 594 | LIST_HEAD(dispose); |
563 | 595 | ||
564 | spin_lock(&inode_lock); | 596 | spin_lock(&inode_sb_list_lock); |
565 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { | 597 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
566 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) | 598 | spin_lock(&inode->i_lock); |
599 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
600 | spin_unlock(&inode->i_lock); | ||
567 | continue; | 601 | continue; |
602 | } | ||
568 | if (inode->i_state & I_DIRTY && !kill_dirty) { | 603 | if (inode->i_state & I_DIRTY && !kill_dirty) { |
604 | spin_unlock(&inode->i_lock); | ||
569 | busy = 1; | 605 | busy = 1; |
570 | continue; | 606 | continue; |
571 | } | 607 | } |
572 | if (atomic_read(&inode->i_count)) { | 608 | if (atomic_read(&inode->i_count)) { |
609 | spin_unlock(&inode->i_lock); | ||
573 | busy = 1; | 610 | busy = 1; |
574 | continue; | 611 | continue; |
575 | } | 612 | } |
576 | 613 | ||
577 | inode->i_state |= I_FREEING; | 614 | inode->i_state |= I_FREEING; |
578 | 615 | inode_lru_list_del(inode); | |
579 | /* | 616 | spin_unlock(&inode->i_lock); |
580 | * Move the inode off the IO lists and LRU once I_FREEING is | 617 | list_add(&inode->i_lru, &dispose); |
581 | * set so that it won't get moved back on there if it is dirty. | ||
582 | */ | ||
583 | list_move(&inode->i_lru, &dispose); | ||
584 | list_del_init(&inode->i_wb_list); | ||
585 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | ||
586 | inodes_stat.nr_unused--; | ||
587 | } | 618 | } |
588 | spin_unlock(&inode_lock); | 619 | spin_unlock(&inode_sb_list_lock); |
589 | 620 | ||
590 | dispose_list(&dispose); | 621 | dispose_list(&dispose); |
591 | 622 | ||
@@ -607,7 +638,7 @@ static int can_unuse(struct inode *inode) | |||
607 | 638 | ||
608 | /* | 639 | /* |
609 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a | 640 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
610 | * temporary list and then are freed outside inode_lock by dispose_list(). | 641 | * temporary list and then are freed outside inode_lru_lock by dispose_list(). |
611 | * | 642 | * |
612 | * Any inodes which are pinned purely because of attached pagecache have their | 643 | * Any inodes which are pinned purely because of attached pagecache have their |
613 | * pagecache removed. If the inode has metadata buffers attached to | 644 | * pagecache removed. If the inode has metadata buffers attached to |
@@ -628,7 +659,7 @@ static void prune_icache(int nr_to_scan) | |||
628 | unsigned long reap = 0; | 659 | unsigned long reap = 0; |
629 | 660 | ||
630 | down_read(&iprune_sem); | 661 | down_read(&iprune_sem); |
631 | spin_lock(&inode_lock); | 662 | spin_lock(&inode_lru_lock); |
632 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 663 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
633 | struct inode *inode; | 664 | struct inode *inode; |
634 | 665 | ||
@@ -638,53 +669,67 @@ static void prune_icache(int nr_to_scan) | |||
638 | inode = list_entry(inode_lru.prev, struct inode, i_lru); | 669 | inode = list_entry(inode_lru.prev, struct inode, i_lru); |
639 | 670 | ||
640 | /* | 671 | /* |
672 | * we are inverting the inode_lru_lock/inode->i_lock here, | ||
673 | * so use a trylock. If we fail to get the lock, just move the | ||
674 | * inode to the back of the list so we don't spin on it. | ||
675 | */ | ||
676 | if (!spin_trylock(&inode->i_lock)) { | ||
677 | list_move(&inode->i_lru, &inode_lru); | ||
678 | continue; | ||
679 | } | ||
680 | |||
681 | /* | ||
641 | * Referenced or dirty inodes are still in use. Give them | 682 | * Referenced or dirty inodes are still in use. Give them |
642 | * another pass through the LRU as we canot reclaim them now. | 683 | * another pass through the LRU as we canot reclaim them now. |
643 | */ | 684 | */ |
644 | if (atomic_read(&inode->i_count) || | 685 | if (atomic_read(&inode->i_count) || |
645 | (inode->i_state & ~I_REFERENCED)) { | 686 | (inode->i_state & ~I_REFERENCED)) { |
646 | list_del_init(&inode->i_lru); | 687 | list_del_init(&inode->i_lru); |
688 | spin_unlock(&inode->i_lock); | ||
647 | inodes_stat.nr_unused--; | 689 | inodes_stat.nr_unused--; |
648 | continue; | 690 | continue; |
649 | } | 691 | } |
650 | 692 | ||
651 | /* recently referenced inodes get one more pass */ | 693 | /* recently referenced inodes get one more pass */ |
652 | if (inode->i_state & I_REFERENCED) { | 694 | if (inode->i_state & I_REFERENCED) { |
653 | list_move(&inode->i_lru, &inode_lru); | ||
654 | inode->i_state &= ~I_REFERENCED; | 695 | inode->i_state &= ~I_REFERENCED; |
696 | list_move(&inode->i_lru, &inode_lru); | ||
697 | spin_unlock(&inode->i_lock); | ||
655 | continue; | 698 | continue; |
656 | } | 699 | } |
657 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 700 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
658 | __iget(inode); | 701 | __iget(inode); |
659 | spin_unlock(&inode_lock); | 702 | spin_unlock(&inode->i_lock); |
703 | spin_unlock(&inode_lru_lock); | ||
660 | if (remove_inode_buffers(inode)) | 704 | if (remove_inode_buffers(inode)) |
661 | reap += invalidate_mapping_pages(&inode->i_data, | 705 | reap += invalidate_mapping_pages(&inode->i_data, |
662 | 0, -1); | 706 | 0, -1); |
663 | iput(inode); | 707 | iput(inode); |
664 | spin_lock(&inode_lock); | 708 | spin_lock(&inode_lru_lock); |
665 | 709 | ||
666 | if (inode != list_entry(inode_lru.next, | 710 | if (inode != list_entry(inode_lru.next, |
667 | struct inode, i_lru)) | 711 | struct inode, i_lru)) |
668 | continue; /* wrong inode or list_empty */ | 712 | continue; /* wrong inode or list_empty */ |
669 | if (!can_unuse(inode)) | 713 | /* avoid lock inversions with trylock */ |
714 | if (!spin_trylock(&inode->i_lock)) | ||
715 | continue; | ||
716 | if (!can_unuse(inode)) { | ||
717 | spin_unlock(&inode->i_lock); | ||
670 | continue; | 718 | continue; |
719 | } | ||
671 | } | 720 | } |
672 | WARN_ON(inode->i_state & I_NEW); | 721 | WARN_ON(inode->i_state & I_NEW); |
673 | inode->i_state |= I_FREEING; | 722 | inode->i_state |= I_FREEING; |
723 | spin_unlock(&inode->i_lock); | ||
674 | 724 | ||
675 | /* | ||
676 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
677 | * set so that it won't get moved back on there if it is dirty. | ||
678 | */ | ||
679 | list_move(&inode->i_lru, &freeable); | 725 | list_move(&inode->i_lru, &freeable); |
680 | list_del_init(&inode->i_wb_list); | ||
681 | inodes_stat.nr_unused--; | 726 | inodes_stat.nr_unused--; |
682 | } | 727 | } |
683 | if (current_is_kswapd()) | 728 | if (current_is_kswapd()) |
684 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 729 | __count_vm_events(KSWAPD_INODESTEAL, reap); |
685 | else | 730 | else |
686 | __count_vm_events(PGINODESTEAL, reap); | 731 | __count_vm_events(PGINODESTEAL, reap); |
687 | spin_unlock(&inode_lock); | 732 | spin_unlock(&inode_lru_lock); |
688 | 733 | ||
689 | dispose_list(&freeable); | 734 | dispose_list(&freeable); |
690 | up_read(&iprune_sem); | 735 | up_read(&iprune_sem); |
@@ -733,15 +778,21 @@ static struct inode *find_inode(struct super_block *sb, | |||
733 | 778 | ||
734 | repeat: | 779 | repeat: |
735 | hlist_for_each_entry(inode, node, head, i_hash) { | 780 | hlist_for_each_entry(inode, node, head, i_hash) { |
736 | if (inode->i_sb != sb) | 781 | spin_lock(&inode->i_lock); |
782 | if (inode->i_sb != sb) { | ||
783 | spin_unlock(&inode->i_lock); | ||
737 | continue; | 784 | continue; |
738 | if (!test(inode, data)) | 785 | } |
786 | if (!test(inode, data)) { | ||
787 | spin_unlock(&inode->i_lock); | ||
739 | continue; | 788 | continue; |
789 | } | ||
740 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { | 790 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { |
741 | __wait_on_freeing_inode(inode); | 791 | __wait_on_freeing_inode(inode); |
742 | goto repeat; | 792 | goto repeat; |
743 | } | 793 | } |
744 | __iget(inode); | 794 | __iget(inode); |
795 | spin_unlock(&inode->i_lock); | ||
745 | return inode; | 796 | return inode; |
746 | } | 797 | } |
747 | return NULL; | 798 | return NULL; |
@@ -759,15 +810,21 @@ static struct inode *find_inode_fast(struct super_block *sb, | |||
759 | 810 | ||
760 | repeat: | 811 | repeat: |
761 | hlist_for_each_entry(inode, node, head, i_hash) { | 812 | hlist_for_each_entry(inode, node, head, i_hash) { |
762 | if (inode->i_ino != ino) | 813 | spin_lock(&inode->i_lock); |
814 | if (inode->i_ino != ino) { | ||
815 | spin_unlock(&inode->i_lock); | ||
763 | continue; | 816 | continue; |
764 | if (inode->i_sb != sb) | 817 | } |
818 | if (inode->i_sb != sb) { | ||
819 | spin_unlock(&inode->i_lock); | ||
765 | continue; | 820 | continue; |
821 | } | ||
766 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { | 822 | if (inode->i_state & (I_FREEING|I_WILL_FREE)) { |
767 | __wait_on_freeing_inode(inode); | 823 | __wait_on_freeing_inode(inode); |
768 | goto repeat; | 824 | goto repeat; |
769 | } | 825 | } |
770 | __iget(inode); | 826 | __iget(inode); |
827 | spin_unlock(&inode->i_lock); | ||
771 | return inode; | 828 | return inode; |
772 | } | 829 | } |
773 | return NULL; | 830 | return NULL; |
@@ -827,19 +884,26 @@ struct inode *new_inode(struct super_block *sb) | |||
827 | { | 884 | { |
828 | struct inode *inode; | 885 | struct inode *inode; |
829 | 886 | ||
830 | spin_lock_prefetch(&inode_lock); | 887 | spin_lock_prefetch(&inode_sb_list_lock); |
831 | 888 | ||
832 | inode = alloc_inode(sb); | 889 | inode = alloc_inode(sb); |
833 | if (inode) { | 890 | if (inode) { |
834 | spin_lock(&inode_lock); | 891 | spin_lock(&inode->i_lock); |
835 | __inode_sb_list_add(inode); | ||
836 | inode->i_state = 0; | 892 | inode->i_state = 0; |
837 | spin_unlock(&inode_lock); | 893 | spin_unlock(&inode->i_lock); |
894 | inode_sb_list_add(inode); | ||
838 | } | 895 | } |
839 | return inode; | 896 | return inode; |
840 | } | 897 | } |
841 | EXPORT_SYMBOL(new_inode); | 898 | EXPORT_SYMBOL(new_inode); |
842 | 899 | ||
900 | /** | ||
901 | * unlock_new_inode - clear the I_NEW state and wake up any waiters | ||
902 | * @inode: new inode to unlock | ||
903 | * | ||
904 | * Called when the inode is fully initialised to clear the new state of the | ||
905 | * inode and wake up anyone waiting for the inode to finish initialisation. | ||
906 | */ | ||
843 | void unlock_new_inode(struct inode *inode) | 907 | void unlock_new_inode(struct inode *inode) |
844 | { | 908 | { |
845 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 909 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
@@ -859,51 +923,67 @@ void unlock_new_inode(struct inode *inode) | |||
859 | } | 923 | } |
860 | } | 924 | } |
861 | #endif | 925 | #endif |
862 | /* | 926 | spin_lock(&inode->i_lock); |
863 | * This is special! We do not need the spinlock when clearing I_NEW, | ||
864 | * because we're guaranteed that nobody else tries to do anything about | ||
865 | * the state of the inode when it is locked, as we just created it (so | ||
866 | * there can be no old holders that haven't tested I_NEW). | ||
867 | * However we must emit the memory barrier so that other CPUs reliably | ||
868 | * see the clearing of I_NEW after the other inode initialisation has | ||
869 | * completed. | ||
870 | */ | ||
871 | smp_mb(); | ||
872 | WARN_ON(!(inode->i_state & I_NEW)); | 927 | WARN_ON(!(inode->i_state & I_NEW)); |
873 | inode->i_state &= ~I_NEW; | 928 | inode->i_state &= ~I_NEW; |
874 | wake_up_inode(inode); | 929 | wake_up_bit(&inode->i_state, __I_NEW); |
930 | spin_unlock(&inode->i_lock); | ||
875 | } | 931 | } |
876 | EXPORT_SYMBOL(unlock_new_inode); | 932 | EXPORT_SYMBOL(unlock_new_inode); |
877 | 933 | ||
878 | /* | 934 | /** |
879 | * This is called without the inode lock held.. Be careful. | 935 | * iget5_locked - obtain an inode from a mounted file system |
936 | * @sb: super block of file system | ||
937 | * @hashval: hash value (usually inode number) to get | ||
938 | * @test: callback used for comparisons between inodes | ||
939 | * @set: callback used to initialize a new struct inode | ||
940 | * @data: opaque data pointer to pass to @test and @set | ||
941 | * | ||
942 | * Search for the inode specified by @hashval and @data in the inode cache, | ||
943 | * and if present it is return it with an increased reference count. This is | ||
944 | * a generalized version of iget_locked() for file systems where the inode | ||
945 | * number is not sufficient for unique identification of an inode. | ||
880 | * | 946 | * |
881 | * We no longer cache the sb_flags in i_flags - see fs.h | 947 | * If the inode is not in cache, allocate a new inode and return it locked, |
882 | * -- rmk@arm.uk.linux.org | 948 | * hashed, and with the I_NEW flag set. The file system gets to fill it in |
949 | * before unlocking it via unlock_new_inode(). | ||
950 | * | ||
951 | * Note both @test and @set are called with the inode_hash_lock held, so can't | ||
952 | * sleep. | ||
883 | */ | 953 | */ |
884 | static struct inode *get_new_inode(struct super_block *sb, | 954 | struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, |
885 | struct hlist_head *head, | 955 | int (*test)(struct inode *, void *), |
886 | int (*test)(struct inode *, void *), | 956 | int (*set)(struct inode *, void *), void *data) |
887 | int (*set)(struct inode *, void *), | ||
888 | void *data) | ||
889 | { | 957 | { |
958 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | ||
890 | struct inode *inode; | 959 | struct inode *inode; |
891 | 960 | ||
961 | spin_lock(&inode_hash_lock); | ||
962 | inode = find_inode(sb, head, test, data); | ||
963 | spin_unlock(&inode_hash_lock); | ||
964 | |||
965 | if (inode) { | ||
966 | wait_on_inode(inode); | ||
967 | return inode; | ||
968 | } | ||
969 | |||
892 | inode = alloc_inode(sb); | 970 | inode = alloc_inode(sb); |
893 | if (inode) { | 971 | if (inode) { |
894 | struct inode *old; | 972 | struct inode *old; |
895 | 973 | ||
896 | spin_lock(&inode_lock); | 974 | spin_lock(&inode_hash_lock); |
897 | /* We released the lock, so.. */ | 975 | /* We released the lock, so.. */ |
898 | old = find_inode(sb, head, test, data); | 976 | old = find_inode(sb, head, test, data); |
899 | if (!old) { | 977 | if (!old) { |
900 | if (set(inode, data)) | 978 | if (set(inode, data)) |
901 | goto set_failed; | 979 | goto set_failed; |
902 | 980 | ||
903 | hlist_add_head(&inode->i_hash, head); | 981 | spin_lock(&inode->i_lock); |
904 | __inode_sb_list_add(inode); | ||
905 | inode->i_state = I_NEW; | 982 | inode->i_state = I_NEW; |
906 | spin_unlock(&inode_lock); | 983 | hlist_add_head(&inode->i_hash, head); |
984 | spin_unlock(&inode->i_lock); | ||
985 | inode_sb_list_add(inode); | ||
986 | spin_unlock(&inode_hash_lock); | ||
907 | 987 | ||
908 | /* Return the locked inode with I_NEW set, the | 988 | /* Return the locked inode with I_NEW set, the |
909 | * caller is responsible for filling in the contents | 989 | * caller is responsible for filling in the contents |
@@ -916,7 +996,7 @@ static struct inode *get_new_inode(struct super_block *sb, | |||
916 | * us. Use the old inode instead of the one we just | 996 | * us. Use the old inode instead of the one we just |
917 | * allocated. | 997 | * allocated. |
918 | */ | 998 | */ |
919 | spin_unlock(&inode_lock); | 999 | spin_unlock(&inode_hash_lock); |
920 | destroy_inode(inode); | 1000 | destroy_inode(inode); |
921 | inode = old; | 1001 | inode = old; |
922 | wait_on_inode(inode); | 1002 | wait_on_inode(inode); |
@@ -924,33 +1004,53 @@ static struct inode *get_new_inode(struct super_block *sb, | |||
924 | return inode; | 1004 | return inode; |
925 | 1005 | ||
926 | set_failed: | 1006 | set_failed: |
927 | spin_unlock(&inode_lock); | 1007 | spin_unlock(&inode_hash_lock); |
928 | destroy_inode(inode); | 1008 | destroy_inode(inode); |
929 | return NULL; | 1009 | return NULL; |
930 | } | 1010 | } |
1011 | EXPORT_SYMBOL(iget5_locked); | ||
931 | 1012 | ||
932 | /* | 1013 | /** |
933 | * get_new_inode_fast is the fast path version of get_new_inode, see the | 1014 | * iget_locked - obtain an inode from a mounted file system |
934 | * comment at iget_locked for details. | 1015 | * @sb: super block of file system |
1016 | * @ino: inode number to get | ||
1017 | * | ||
1018 | * Search for the inode specified by @ino in the inode cache and if present | ||
1019 | * return it with an increased reference count. This is for file systems | ||
1020 | * where the inode number is sufficient for unique identification of an inode. | ||
1021 | * | ||
1022 | * If the inode is not in cache, allocate a new inode and return it locked, | ||
1023 | * hashed, and with the I_NEW flag set. The file system gets to fill it in | ||
1024 | * before unlocking it via unlock_new_inode(). | ||
935 | */ | 1025 | */ |
936 | static struct inode *get_new_inode_fast(struct super_block *sb, | 1026 | struct inode *iget_locked(struct super_block *sb, unsigned long ino) |
937 | struct hlist_head *head, unsigned long ino) | ||
938 | { | 1027 | { |
1028 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | ||
939 | struct inode *inode; | 1029 | struct inode *inode; |
940 | 1030 | ||
1031 | spin_lock(&inode_hash_lock); | ||
1032 | inode = find_inode_fast(sb, head, ino); | ||
1033 | spin_unlock(&inode_hash_lock); | ||
1034 | if (inode) { | ||
1035 | wait_on_inode(inode); | ||
1036 | return inode; | ||
1037 | } | ||
1038 | |||
941 | inode = alloc_inode(sb); | 1039 | inode = alloc_inode(sb); |
942 | if (inode) { | 1040 | if (inode) { |
943 | struct inode *old; | 1041 | struct inode *old; |
944 | 1042 | ||
945 | spin_lock(&inode_lock); | 1043 | spin_lock(&inode_hash_lock); |
946 | /* We released the lock, so.. */ | 1044 | /* We released the lock, so.. */ |
947 | old = find_inode_fast(sb, head, ino); | 1045 | old = find_inode_fast(sb, head, ino); |
948 | if (!old) { | 1046 | if (!old) { |
949 | inode->i_ino = ino; | 1047 | inode->i_ino = ino; |
950 | hlist_add_head(&inode->i_hash, head); | 1048 | spin_lock(&inode->i_lock); |
951 | __inode_sb_list_add(inode); | ||
952 | inode->i_state = I_NEW; | 1049 | inode->i_state = I_NEW; |
953 | spin_unlock(&inode_lock); | 1050 | hlist_add_head(&inode->i_hash, head); |
1051 | spin_unlock(&inode->i_lock); | ||
1052 | inode_sb_list_add(inode); | ||
1053 | spin_unlock(&inode_hash_lock); | ||
954 | 1054 | ||
955 | /* Return the locked inode with I_NEW set, the | 1055 | /* Return the locked inode with I_NEW set, the |
956 | * caller is responsible for filling in the contents | 1056 | * caller is responsible for filling in the contents |
@@ -963,13 +1063,14 @@ static struct inode *get_new_inode_fast(struct super_block *sb, | |||
963 | * us. Use the old inode instead of the one we just | 1063 | * us. Use the old inode instead of the one we just |
964 | * allocated. | 1064 | * allocated. |
965 | */ | 1065 | */ |
966 | spin_unlock(&inode_lock); | 1066 | spin_unlock(&inode_hash_lock); |
967 | destroy_inode(inode); | 1067 | destroy_inode(inode); |
968 | inode = old; | 1068 | inode = old; |
969 | wait_on_inode(inode); | 1069 | wait_on_inode(inode); |
970 | } | 1070 | } |
971 | return inode; | 1071 | return inode; |
972 | } | 1072 | } |
1073 | EXPORT_SYMBOL(iget_locked); | ||
973 | 1074 | ||
974 | /* | 1075 | /* |
975 | * search the inode cache for a matching inode number. | 1076 | * search the inode cache for a matching inode number. |
@@ -984,10 +1085,14 @@ static int test_inode_iunique(struct super_block *sb, unsigned long ino) | |||
984 | struct hlist_node *node; | 1085 | struct hlist_node *node; |
985 | struct inode *inode; | 1086 | struct inode *inode; |
986 | 1087 | ||
1088 | spin_lock(&inode_hash_lock); | ||
987 | hlist_for_each_entry(inode, node, b, i_hash) { | 1089 | hlist_for_each_entry(inode, node, b, i_hash) { |
988 | if (inode->i_ino == ino && inode->i_sb == sb) | 1090 | if (inode->i_ino == ino && inode->i_sb == sb) { |
1091 | spin_unlock(&inode_hash_lock); | ||
989 | return 0; | 1092 | return 0; |
1093 | } | ||
990 | } | 1094 | } |
1095 | spin_unlock(&inode_hash_lock); | ||
991 | 1096 | ||
992 | return 1; | 1097 | return 1; |
993 | } | 1098 | } |
@@ -1017,7 +1122,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) | |||
1017 | static unsigned int counter; | 1122 | static unsigned int counter; |
1018 | ino_t res; | 1123 | ino_t res; |
1019 | 1124 | ||
1020 | spin_lock(&inode_lock); | ||
1021 | spin_lock(&iunique_lock); | 1125 | spin_lock(&iunique_lock); |
1022 | do { | 1126 | do { |
1023 | if (counter <= max_reserved) | 1127 | if (counter <= max_reserved) |
@@ -1025,7 +1129,6 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) | |||
1025 | res = counter++; | 1129 | res = counter++; |
1026 | } while (!test_inode_iunique(sb, res)); | 1130 | } while (!test_inode_iunique(sb, res)); |
1027 | spin_unlock(&iunique_lock); | 1131 | spin_unlock(&iunique_lock); |
1028 | spin_unlock(&inode_lock); | ||
1029 | 1132 | ||
1030 | return res; | 1133 | return res; |
1031 | } | 1134 | } |
@@ -1033,116 +1136,50 @@ EXPORT_SYMBOL(iunique); | |||
1033 | 1136 | ||
1034 | struct inode *igrab(struct inode *inode) | 1137 | struct inode *igrab(struct inode *inode) |
1035 | { | 1138 | { |
1036 | spin_lock(&inode_lock); | 1139 | spin_lock(&inode->i_lock); |
1037 | if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) | 1140 | if (!(inode->i_state & (I_FREEING|I_WILL_FREE))) { |
1038 | __iget(inode); | 1141 | __iget(inode); |
1039 | else | 1142 | spin_unlock(&inode->i_lock); |
1143 | } else { | ||
1144 | spin_unlock(&inode->i_lock); | ||
1040 | /* | 1145 | /* |
1041 | * Handle the case where s_op->clear_inode is not been | 1146 | * Handle the case where s_op->clear_inode is not been |
1042 | * called yet, and somebody is calling igrab | 1147 | * called yet, and somebody is calling igrab |
1043 | * while the inode is getting freed. | 1148 | * while the inode is getting freed. |
1044 | */ | 1149 | */ |
1045 | inode = NULL; | 1150 | inode = NULL; |
1046 | spin_unlock(&inode_lock); | 1151 | } |
1047 | return inode; | 1152 | return inode; |
1048 | } | 1153 | } |
1049 | EXPORT_SYMBOL(igrab); | 1154 | EXPORT_SYMBOL(igrab); |
1050 | 1155 | ||
1051 | /** | 1156 | /** |
1052 | * ifind - internal function, you want ilookup5() or iget5(). | ||
1053 | * @sb: super block of file system to search | ||
1054 | * @head: the head of the list to search | ||
1055 | * @test: callback used for comparisons between inodes | ||
1056 | * @data: opaque data pointer to pass to @test | ||
1057 | * @wait: if true wait for the inode to be unlocked, if false do not | ||
1058 | * | ||
1059 | * ifind() searches for the inode specified by @data in the inode | ||
1060 | * cache. This is a generalized version of ifind_fast() for file systems where | ||
1061 | * the inode number is not sufficient for unique identification of an inode. | ||
1062 | * | ||
1063 | * If the inode is in the cache, the inode is returned with an incremented | ||
1064 | * reference count. | ||
1065 | * | ||
1066 | * Otherwise NULL is returned. | ||
1067 | * | ||
1068 | * Note, @test is called with the inode_lock held, so can't sleep. | ||
1069 | */ | ||
1070 | static struct inode *ifind(struct super_block *sb, | ||
1071 | struct hlist_head *head, int (*test)(struct inode *, void *), | ||
1072 | void *data, const int wait) | ||
1073 | { | ||
1074 | struct inode *inode; | ||
1075 | |||
1076 | spin_lock(&inode_lock); | ||
1077 | inode = find_inode(sb, head, test, data); | ||
1078 | if (inode) { | ||
1079 | spin_unlock(&inode_lock); | ||
1080 | if (likely(wait)) | ||
1081 | wait_on_inode(inode); | ||
1082 | return inode; | ||
1083 | } | ||
1084 | spin_unlock(&inode_lock); | ||
1085 | return NULL; | ||
1086 | } | ||
1087 | |||
1088 | /** | ||
1089 | * ifind_fast - internal function, you want ilookup() or iget(). | ||
1090 | * @sb: super block of file system to search | ||
1091 | * @head: head of the list to search | ||
1092 | * @ino: inode number to search for | ||
1093 | * | ||
1094 | * ifind_fast() searches for the inode @ino in the inode cache. This is for | ||
1095 | * file systems where the inode number is sufficient for unique identification | ||
1096 | * of an inode. | ||
1097 | * | ||
1098 | * If the inode is in the cache, the inode is returned with an incremented | ||
1099 | * reference count. | ||
1100 | * | ||
1101 | * Otherwise NULL is returned. | ||
1102 | */ | ||
1103 | static struct inode *ifind_fast(struct super_block *sb, | ||
1104 | struct hlist_head *head, unsigned long ino) | ||
1105 | { | ||
1106 | struct inode *inode; | ||
1107 | |||
1108 | spin_lock(&inode_lock); | ||
1109 | inode = find_inode_fast(sb, head, ino); | ||
1110 | if (inode) { | ||
1111 | spin_unlock(&inode_lock); | ||
1112 | wait_on_inode(inode); | ||
1113 | return inode; | ||
1114 | } | ||
1115 | spin_unlock(&inode_lock); | ||
1116 | return NULL; | ||
1117 | } | ||
1118 | |||
1119 | /** | ||
1120 | * ilookup5_nowait - search for an inode in the inode cache | 1157 | * ilookup5_nowait - search for an inode in the inode cache |
1121 | * @sb: super block of file system to search | 1158 | * @sb: super block of file system to search |
1122 | * @hashval: hash value (usually inode number) to search for | 1159 | * @hashval: hash value (usually inode number) to search for |
1123 | * @test: callback used for comparisons between inodes | 1160 | * @test: callback used for comparisons between inodes |
1124 | * @data: opaque data pointer to pass to @test | 1161 | * @data: opaque data pointer to pass to @test |
1125 | * | 1162 | * |
1126 | * ilookup5() uses ifind() to search for the inode specified by @hashval and | 1163 | * Search for the inode specified by @hashval and @data in the inode cache. |
1127 | * @data in the inode cache. This is a generalized version of ilookup() for | ||
1128 | * file systems where the inode number is not sufficient for unique | ||
1129 | * identification of an inode. | ||
1130 | * | ||
1131 | * If the inode is in the cache, the inode is returned with an incremented | 1164 | * If the inode is in the cache, the inode is returned with an incremented |
1132 | * reference count. Note, the inode lock is not waited upon so you have to be | 1165 | * reference count. |
1133 | * very careful what you do with the returned inode. You probably should be | ||
1134 | * using ilookup5() instead. | ||
1135 | * | 1166 | * |
1136 | * Otherwise NULL is returned. | 1167 | * Note: I_NEW is not waited upon so you have to be very careful what you do |
1168 | * with the returned inode. You probably should be using ilookup5() instead. | ||
1137 | * | 1169 | * |
1138 | * Note, @test is called with the inode_lock held, so can't sleep. | 1170 | * Note2: @test is called with the inode_hash_lock held, so can't sleep. |
1139 | */ | 1171 | */ |
1140 | struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, | 1172 | struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, |
1141 | int (*test)(struct inode *, void *), void *data) | 1173 | int (*test)(struct inode *, void *), void *data) |
1142 | { | 1174 | { |
1143 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | 1175 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); |
1176 | struct inode *inode; | ||
1177 | |||
1178 | spin_lock(&inode_hash_lock); | ||
1179 | inode = find_inode(sb, head, test, data); | ||
1180 | spin_unlock(&inode_hash_lock); | ||
1144 | 1181 | ||
1145 | return ifind(sb, head, test, data, 0); | 1182 | return inode; |
1146 | } | 1183 | } |
1147 | EXPORT_SYMBOL(ilookup5_nowait); | 1184 | EXPORT_SYMBOL(ilookup5_nowait); |
1148 | 1185 | ||
@@ -1153,24 +1190,24 @@ EXPORT_SYMBOL(ilookup5_nowait); | |||
1153 | * @test: callback used for comparisons between inodes | 1190 | * @test: callback used for comparisons between inodes |
1154 | * @data: opaque data pointer to pass to @test | 1191 | * @data: opaque data pointer to pass to @test |
1155 | * | 1192 | * |
1156 | * ilookup5() uses ifind() to search for the inode specified by @hashval and | 1193 | * Search for the inode specified by @hashval and @data in the inode cache, |
1157 | * @data in the inode cache. This is a generalized version of ilookup() for | 1194 | * and if the inode is in the cache, return the inode with an incremented |
1158 | * file systems where the inode number is not sufficient for unique | 1195 | * reference count. Waits on I_NEW before returning the inode. |
1159 | * identification of an inode. | ||
1160 | * | ||
1161 | * If the inode is in the cache, the inode lock is waited upon and the inode is | ||
1162 | * returned with an incremented reference count. | 1196 | * returned with an incremented reference count. |
1163 | * | 1197 | * |
1164 | * Otherwise NULL is returned. | 1198 | * This is a generalized version of ilookup() for file systems where the |
1199 | * inode number is not sufficient for unique identification of an inode. | ||
1165 | * | 1200 | * |
1166 | * Note, @test is called with the inode_lock held, so can't sleep. | 1201 | * Note: @test is called with the inode_hash_lock held, so can't sleep. |
1167 | */ | 1202 | */ |
1168 | struct inode *ilookup5(struct super_block *sb, unsigned long hashval, | 1203 | struct inode *ilookup5(struct super_block *sb, unsigned long hashval, |
1169 | int (*test)(struct inode *, void *), void *data) | 1204 | int (*test)(struct inode *, void *), void *data) |
1170 | { | 1205 | { |
1171 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | 1206 | struct inode *inode = ilookup5_nowait(sb, hashval, test, data); |
1172 | 1207 | ||
1173 | return ifind(sb, head, test, data, 1); | 1208 | if (inode) |
1209 | wait_on_inode(inode); | ||
1210 | return inode; | ||
1174 | } | 1211 | } |
1175 | EXPORT_SYMBOL(ilookup5); | 1212 | EXPORT_SYMBOL(ilookup5); |
1176 | 1213 | ||
@@ -1179,91 +1216,23 @@ EXPORT_SYMBOL(ilookup5); | |||
1179 | * @sb: super block of file system to search | 1216 | * @sb: super block of file system to search |
1180 | * @ino: inode number to search for | 1217 | * @ino: inode number to search for |
1181 | * | 1218 | * |
1182 | * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. | 1219 | * Search for the inode @ino in the inode cache, and if the inode is in the |
1183 | * This is for file systems where the inode number is sufficient for unique | 1220 | * cache, the inode is returned with an incremented reference count. |
1184 | * identification of an inode. | ||
1185 | * | ||
1186 | * If the inode is in the cache, the inode is returned with an incremented | ||
1187 | * reference count. | ||
1188 | * | ||
1189 | * Otherwise NULL is returned. | ||
1190 | */ | 1221 | */ |
1191 | struct inode *ilookup(struct super_block *sb, unsigned long ino) | 1222 | struct inode *ilookup(struct super_block *sb, unsigned long ino) |
1192 | { | 1223 | { |
1193 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | 1224 | struct hlist_head *head = inode_hashtable + hash(sb, ino); |
1194 | |||
1195 | return ifind_fast(sb, head, ino); | ||
1196 | } | ||
1197 | EXPORT_SYMBOL(ilookup); | ||
1198 | |||
1199 | /** | ||
1200 | * iget5_locked - obtain an inode from a mounted file system | ||
1201 | * @sb: super block of file system | ||
1202 | * @hashval: hash value (usually inode number) to get | ||
1203 | * @test: callback used for comparisons between inodes | ||
1204 | * @set: callback used to initialize a new struct inode | ||
1205 | * @data: opaque data pointer to pass to @test and @set | ||
1206 | * | ||
1207 | * iget5_locked() uses ifind() to search for the inode specified by @hashval | ||
1208 | * and @data in the inode cache and if present it is returned with an increased | ||
1209 | * reference count. This is a generalized version of iget_locked() for file | ||
1210 | * systems where the inode number is not sufficient for unique identification | ||
1211 | * of an inode. | ||
1212 | * | ||
1213 | * If the inode is not in cache, get_new_inode() is called to allocate a new | ||
1214 | * inode and this is returned locked, hashed, and with the I_NEW flag set. The | ||
1215 | * file system gets to fill it in before unlocking it via unlock_new_inode(). | ||
1216 | * | ||
1217 | * Note both @test and @set are called with the inode_lock held, so can't sleep. | ||
1218 | */ | ||
1219 | struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, | ||
1220 | int (*test)(struct inode *, void *), | ||
1221 | int (*set)(struct inode *, void *), void *data) | ||
1222 | { | ||
1223 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | ||
1224 | struct inode *inode; | 1225 | struct inode *inode; |
1225 | 1226 | ||
1226 | inode = ifind(sb, head, test, data, 1); | 1227 | spin_lock(&inode_hash_lock); |
1227 | if (inode) | 1228 | inode = find_inode_fast(sb, head, ino); |
1228 | return inode; | 1229 | spin_unlock(&inode_hash_lock); |
1229 | /* | ||
1230 | * get_new_inode() will do the right thing, re-trying the search | ||
1231 | * in case it had to block at any point. | ||
1232 | */ | ||
1233 | return get_new_inode(sb, head, test, set, data); | ||
1234 | } | ||
1235 | EXPORT_SYMBOL(iget5_locked); | ||
1236 | |||
1237 | /** | ||
1238 | * iget_locked - obtain an inode from a mounted file system | ||
1239 | * @sb: super block of file system | ||
1240 | * @ino: inode number to get | ||
1241 | * | ||
1242 | * iget_locked() uses ifind_fast() to search for the inode specified by @ino in | ||
1243 | * the inode cache and if present it is returned with an increased reference | ||
1244 | * count. This is for file systems where the inode number is sufficient for | ||
1245 | * unique identification of an inode. | ||
1246 | * | ||
1247 | * If the inode is not in cache, get_new_inode_fast() is called to allocate a | ||
1248 | * new inode and this is returned locked, hashed, and with the I_NEW flag set. | ||
1249 | * The file system gets to fill it in before unlocking it via | ||
1250 | * unlock_new_inode(). | ||
1251 | */ | ||
1252 | struct inode *iget_locked(struct super_block *sb, unsigned long ino) | ||
1253 | { | ||
1254 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | ||
1255 | struct inode *inode; | ||
1256 | 1230 | ||
1257 | inode = ifind_fast(sb, head, ino); | ||
1258 | if (inode) | 1231 | if (inode) |
1259 | return inode; | 1232 | wait_on_inode(inode); |
1260 | /* | 1233 | return inode; |
1261 | * get_new_inode_fast() will do the right thing, re-trying the search | ||
1262 | * in case it had to block at any point. | ||
1263 | */ | ||
1264 | return get_new_inode_fast(sb, head, ino); | ||
1265 | } | 1234 | } |
1266 | EXPORT_SYMBOL(iget_locked); | 1235 | EXPORT_SYMBOL(ilookup); |
1267 | 1236 | ||
1268 | int insert_inode_locked(struct inode *inode) | 1237 | int insert_inode_locked(struct inode *inode) |
1269 | { | 1238 | { |
@@ -1271,27 +1240,33 @@ int insert_inode_locked(struct inode *inode) | |||
1271 | ino_t ino = inode->i_ino; | 1240 | ino_t ino = inode->i_ino; |
1272 | struct hlist_head *head = inode_hashtable + hash(sb, ino); | 1241 | struct hlist_head *head = inode_hashtable + hash(sb, ino); |
1273 | 1242 | ||
1274 | inode->i_state |= I_NEW; | ||
1275 | while (1) { | 1243 | while (1) { |
1276 | struct hlist_node *node; | 1244 | struct hlist_node *node; |
1277 | struct inode *old = NULL; | 1245 | struct inode *old = NULL; |
1278 | spin_lock(&inode_lock); | 1246 | spin_lock(&inode_hash_lock); |
1279 | hlist_for_each_entry(old, node, head, i_hash) { | 1247 | hlist_for_each_entry(old, node, head, i_hash) { |
1280 | if (old->i_ino != ino) | 1248 | if (old->i_ino != ino) |
1281 | continue; | 1249 | continue; |
1282 | if (old->i_sb != sb) | 1250 | if (old->i_sb != sb) |
1283 | continue; | 1251 | continue; |
1284 | if (old->i_state & (I_FREEING|I_WILL_FREE)) | 1252 | spin_lock(&old->i_lock); |
1253 | if (old->i_state & (I_FREEING|I_WILL_FREE)) { | ||
1254 | spin_unlock(&old->i_lock); | ||
1285 | continue; | 1255 | continue; |
1256 | } | ||
1286 | break; | 1257 | break; |
1287 | } | 1258 | } |
1288 | if (likely(!node)) { | 1259 | if (likely(!node)) { |
1260 | spin_lock(&inode->i_lock); | ||
1261 | inode->i_state |= I_NEW; | ||
1289 | hlist_add_head(&inode->i_hash, head); | 1262 | hlist_add_head(&inode->i_hash, head); |
1290 | spin_unlock(&inode_lock); | 1263 | spin_unlock(&inode->i_lock); |
1264 | spin_unlock(&inode_hash_lock); | ||
1291 | return 0; | 1265 | return 0; |
1292 | } | 1266 | } |
1293 | __iget(old); | 1267 | __iget(old); |
1294 | spin_unlock(&inode_lock); | 1268 | spin_unlock(&old->i_lock); |
1269 | spin_unlock(&inode_hash_lock); | ||
1295 | wait_on_inode(old); | 1270 | wait_on_inode(old); |
1296 | if (unlikely(!inode_unhashed(old))) { | 1271 | if (unlikely(!inode_unhashed(old))) { |
1297 | iput(old); | 1272 | iput(old); |
@@ -1308,29 +1283,34 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, | |||
1308 | struct super_block *sb = inode->i_sb; | 1283 | struct super_block *sb = inode->i_sb; |
1309 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); | 1284 | struct hlist_head *head = inode_hashtable + hash(sb, hashval); |
1310 | 1285 | ||
1311 | inode->i_state |= I_NEW; | ||
1312 | |||
1313 | while (1) { | 1286 | while (1) { |
1314 | struct hlist_node *node; | 1287 | struct hlist_node *node; |
1315 | struct inode *old = NULL; | 1288 | struct inode *old = NULL; |
1316 | 1289 | ||
1317 | spin_lock(&inode_lock); | 1290 | spin_lock(&inode_hash_lock); |
1318 | hlist_for_each_entry(old, node, head, i_hash) { | 1291 | hlist_for_each_entry(old, node, head, i_hash) { |
1319 | if (old->i_sb != sb) | 1292 | if (old->i_sb != sb) |
1320 | continue; | 1293 | continue; |
1321 | if (!test(old, data)) | 1294 | if (!test(old, data)) |
1322 | continue; | 1295 | continue; |
1323 | if (old->i_state & (I_FREEING|I_WILL_FREE)) | 1296 | spin_lock(&old->i_lock); |
1297 | if (old->i_state & (I_FREEING|I_WILL_FREE)) { | ||
1298 | spin_unlock(&old->i_lock); | ||
1324 | continue; | 1299 | continue; |
1300 | } | ||
1325 | break; | 1301 | break; |
1326 | } | 1302 | } |
1327 | if (likely(!node)) { | 1303 | if (likely(!node)) { |
1304 | spin_lock(&inode->i_lock); | ||
1305 | inode->i_state |= I_NEW; | ||
1328 | hlist_add_head(&inode->i_hash, head); | 1306 | hlist_add_head(&inode->i_hash, head); |
1329 | spin_unlock(&inode_lock); | 1307 | spin_unlock(&inode->i_lock); |
1308 | spin_unlock(&inode_hash_lock); | ||
1330 | return 0; | 1309 | return 0; |
1331 | } | 1310 | } |
1332 | __iget(old); | 1311 | __iget(old); |
1333 | spin_unlock(&inode_lock); | 1312 | spin_unlock(&old->i_lock); |
1313 | spin_unlock(&inode_hash_lock); | ||
1334 | wait_on_inode(old); | 1314 | wait_on_inode(old); |
1335 | if (unlikely(!inode_unhashed(old))) { | 1315 | if (unlikely(!inode_unhashed(old))) { |
1336 | iput(old); | 1316 | iput(old); |
@@ -1375,47 +1355,35 @@ static void iput_final(struct inode *inode) | |||
1375 | const struct super_operations *op = inode->i_sb->s_op; | 1355 | const struct super_operations *op = inode->i_sb->s_op; |
1376 | int drop; | 1356 | int drop; |
1377 | 1357 | ||
1358 | WARN_ON(inode->i_state & I_NEW); | ||
1359 | |||
1378 | if (op && op->drop_inode) | 1360 | if (op && op->drop_inode) |
1379 | drop = op->drop_inode(inode); | 1361 | drop = op->drop_inode(inode); |
1380 | else | 1362 | else |
1381 | drop = generic_drop_inode(inode); | 1363 | drop = generic_drop_inode(inode); |
1382 | 1364 | ||
1365 | if (!drop && (sb->s_flags & MS_ACTIVE)) { | ||
1366 | inode->i_state |= I_REFERENCED; | ||
1367 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
1368 | inode_lru_list_add(inode); | ||
1369 | spin_unlock(&inode->i_lock); | ||
1370 | return; | ||
1371 | } | ||
1372 | |||
1383 | if (!drop) { | 1373 | if (!drop) { |
1384 | if (sb->s_flags & MS_ACTIVE) { | ||
1385 | inode->i_state |= I_REFERENCED; | ||
1386 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
1387 | inode_lru_list_add(inode); | ||
1388 | } | ||
1389 | spin_unlock(&inode_lock); | ||
1390 | return; | ||
1391 | } | ||
1392 | WARN_ON(inode->i_state & I_NEW); | ||
1393 | inode->i_state |= I_WILL_FREE; | 1374 | inode->i_state |= I_WILL_FREE; |
1394 | spin_unlock(&inode_lock); | 1375 | spin_unlock(&inode->i_lock); |
1395 | write_inode_now(inode, 1); | 1376 | write_inode_now(inode, 1); |
1396 | spin_lock(&inode_lock); | 1377 | spin_lock(&inode->i_lock); |
1397 | WARN_ON(inode->i_state & I_NEW); | 1378 | WARN_ON(inode->i_state & I_NEW); |
1398 | inode->i_state &= ~I_WILL_FREE; | 1379 | inode->i_state &= ~I_WILL_FREE; |
1399 | __remove_inode_hash(inode); | ||
1400 | } | 1380 | } |
1401 | 1381 | ||
1402 | WARN_ON(inode->i_state & I_NEW); | ||
1403 | inode->i_state |= I_FREEING; | 1382 | inode->i_state |= I_FREEING; |
1404 | |||
1405 | /* | ||
1406 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
1407 | * set so that it won't get moved back on there if it is dirty. | ||
1408 | */ | ||
1409 | inode_lru_list_del(inode); | 1383 | inode_lru_list_del(inode); |
1410 | list_del_init(&inode->i_wb_list); | 1384 | spin_unlock(&inode->i_lock); |
1411 | 1385 | ||
1412 | __inode_sb_list_del(inode); | ||
1413 | spin_unlock(&inode_lock); | ||
1414 | evict(inode); | 1386 | evict(inode); |
1415 | remove_inode_hash(inode); | ||
1416 | wake_up_inode(inode); | ||
1417 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); | ||
1418 | destroy_inode(inode); | ||
1419 | } | 1387 | } |
1420 | 1388 | ||
1421 | /** | 1389 | /** |
@@ -1432,7 +1400,7 @@ void iput(struct inode *inode) | |||
1432 | if (inode) { | 1400 | if (inode) { |
1433 | BUG_ON(inode->i_state & I_CLEAR); | 1401 | BUG_ON(inode->i_state & I_CLEAR); |
1434 | 1402 | ||
1435 | if (atomic_dec_and_lock(&inode->i_count, &inode_lock)) | 1403 | if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) |
1436 | iput_final(inode); | 1404 | iput_final(inode); |
1437 | } | 1405 | } |
1438 | } | 1406 | } |
@@ -1611,9 +1579,8 @@ EXPORT_SYMBOL(inode_wait); | |||
1611 | * to recheck inode state. | 1579 | * to recheck inode state. |
1612 | * | 1580 | * |
1613 | * It doesn't matter if I_NEW is not set initially, a call to | 1581 | * It doesn't matter if I_NEW is not set initially, a call to |
1614 | * wake_up_inode() after removing from the hash list will DTRT. | 1582 | * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list |
1615 | * | 1583 | * will DTRT. |
1616 | * This is called with inode_lock held. | ||
1617 | */ | 1584 | */ |
1618 | static void __wait_on_freeing_inode(struct inode *inode) | 1585 | static void __wait_on_freeing_inode(struct inode *inode) |
1619 | { | 1586 | { |
@@ -1621,10 +1588,11 @@ static void __wait_on_freeing_inode(struct inode *inode) | |||
1621 | DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); | 1588 | DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); |
1622 | wq = bit_waitqueue(&inode->i_state, __I_NEW); | 1589 | wq = bit_waitqueue(&inode->i_state, __I_NEW); |
1623 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); | 1590 | prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE); |
1624 | spin_unlock(&inode_lock); | 1591 | spin_unlock(&inode->i_lock); |
1592 | spin_unlock(&inode_hash_lock); | ||
1625 | schedule(); | 1593 | schedule(); |
1626 | finish_wait(wq, &wait.wait); | 1594 | finish_wait(wq, &wait.wait); |
1627 | spin_lock(&inode_lock); | 1595 | spin_lock(&inode_hash_lock); |
1628 | } | 1596 | } |
1629 | 1597 | ||
1630 | static __initdata unsigned long ihash_entries; | 1598 | static __initdata unsigned long ihash_entries; |
diff --git a/fs/internal.h b/fs/internal.h index 8318059b42c6..b29c46e4e32f 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -125,6 +125,13 @@ extern long do_handle_open(int mountdirfd, | |||
125 | /* | 125 | /* |
126 | * inode.c | 126 | * inode.c |
127 | */ | 127 | */ |
128 | extern spinlock_t inode_sb_list_lock; | ||
129 | |||
130 | /* | ||
131 | * fs-writeback.c | ||
132 | */ | ||
133 | extern void inode_wb_list_del(struct inode *inode); | ||
134 | |||
128 | extern int get_nr_dirty_inodes(void); | 135 | extern int get_nr_dirty_inodes(void); |
129 | extern void evict_inodes(struct super_block *); | 136 | extern void evict_inodes(struct super_block *); |
130 | extern int invalidate_inodes(struct super_block *, bool); | 137 | extern int invalidate_inodes(struct super_block *, bool); |
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 4f9cc0482949..3e93cdd19005 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c | |||
@@ -31,7 +31,7 @@ | |||
31 | * is used to release xattr name/value pair and detach from c->xattrindex. | 31 | * is used to release xattr name/value pair and detach from c->xattrindex. |
32 | * reclaim_xattr_datum(c) | 32 | * reclaim_xattr_datum(c) |
33 | * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when | 33 | * is used to reclaim xattr name/value pairs on the xattr name/value pair cache when |
34 | * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold | 34 | * memory usage by cache is over c->xdatum_mem_threshold. Currently, this threshold |
35 | * is hard coded as 32KiB. | 35 | * is hard coded as 32KiB. |
36 | * do_verify_xattr_datum(c, xd) | 36 | * do_verify_xattr_datum(c, xd) |
37 | * is used to load the xdatum informations without name/value pair from the medium. | 37 | * is used to load the xdatum informations without name/value pair from the medium. |
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 03b8c240aeda..edfea7a3a747 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c | |||
@@ -293,7 +293,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
293 | return ret; | 293 | return ret; |
294 | } | 294 | } |
295 | 295 | ||
296 | /* called with inode_lock held */ | 296 | /* called with inode->i_lock held */ |
297 | static int logfs_drop_inode(struct inode *inode) | 297 | static int logfs_drop_inode(struct inode *inode) |
298 | { | 298 | { |
299 | struct logfs_super *super = logfs_super(inode->i_sb); | 299 | struct logfs_super *super = logfs_super(inode->i_sb); |
diff --git a/fs/namei.c b/fs/namei.c index d0066e17d45d..3cb616d38d9c 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -992,6 +992,12 @@ int follow_down_one(struct path *path) | |||
992 | return 0; | 992 | return 0; |
993 | } | 993 | } |
994 | 994 | ||
995 | static inline bool managed_dentry_might_block(struct dentry *dentry) | ||
996 | { | ||
997 | return (dentry->d_flags & DCACHE_MANAGE_TRANSIT && | ||
998 | dentry->d_op->d_manage(dentry, true) < 0); | ||
999 | } | ||
1000 | |||
995 | /* | 1001 | /* |
996 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we | 1002 | * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we |
997 | * meet a managed dentry and we're not walking to "..". True is returned to | 1003 | * meet a managed dentry and we're not walking to "..". True is returned to |
@@ -1000,19 +1006,26 @@ int follow_down_one(struct path *path) | |||
1000 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, | 1006 | static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, |
1001 | struct inode **inode, bool reverse_transit) | 1007 | struct inode **inode, bool reverse_transit) |
1002 | { | 1008 | { |
1003 | while (d_mountpoint(path->dentry)) { | 1009 | for (;;) { |
1004 | struct vfsmount *mounted; | 1010 | struct vfsmount *mounted; |
1005 | if (unlikely(path->dentry->d_flags & DCACHE_MANAGE_TRANSIT) && | 1011 | /* |
1006 | !reverse_transit && | 1012 | * Don't forget we might have a non-mountpoint managed dentry |
1007 | path->dentry->d_op->d_manage(path->dentry, true) < 0) | 1013 | * that wants to block transit. |
1014 | */ | ||
1015 | *inode = path->dentry->d_inode; | ||
1016 | if (!reverse_transit && | ||
1017 | unlikely(managed_dentry_might_block(path->dentry))) | ||
1008 | return false; | 1018 | return false; |
1019 | |||
1020 | if (!d_mountpoint(path->dentry)) | ||
1021 | break; | ||
1022 | |||
1009 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); | 1023 | mounted = __lookup_mnt(path->mnt, path->dentry, 1); |
1010 | if (!mounted) | 1024 | if (!mounted) |
1011 | break; | 1025 | break; |
1012 | path->mnt = mounted; | 1026 | path->mnt = mounted; |
1013 | path->dentry = mounted->mnt_root; | 1027 | path->dentry = mounted->mnt_root; |
1014 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); | 1028 | nd->seq = read_seqcount_begin(&path->dentry->d_seq); |
1015 | *inode = path->dentry->d_inode; | ||
1016 | } | 1029 | } |
1017 | 1030 | ||
1018 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) | 1031 | if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index abdf38d5971d..7237672216c8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -44,6 +44,7 @@ | |||
44 | /* #define NFS_DEBUG_VERBOSE 1 */ | 44 | /* #define NFS_DEBUG_VERBOSE 1 */ |
45 | 45 | ||
46 | static int nfs_opendir(struct inode *, struct file *); | 46 | static int nfs_opendir(struct inode *, struct file *); |
47 | static int nfs_closedir(struct inode *, struct file *); | ||
47 | static int nfs_readdir(struct file *, void *, filldir_t); | 48 | static int nfs_readdir(struct file *, void *, filldir_t); |
48 | static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); | 49 | static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); |
49 | static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); | 50 | static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); |
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = { | |||
64 | .read = generic_read_dir, | 65 | .read = generic_read_dir, |
65 | .readdir = nfs_readdir, | 66 | .readdir = nfs_readdir, |
66 | .open = nfs_opendir, | 67 | .open = nfs_opendir, |
67 | .release = nfs_release, | 68 | .release = nfs_closedir, |
68 | .fsync = nfs_fsync_dir, | 69 | .fsync = nfs_fsync_dir, |
69 | }; | 70 | }; |
70 | 71 | ||
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = { | |||
133 | 134 | ||
134 | #endif /* CONFIG_NFS_V4 */ | 135 | #endif /* CONFIG_NFS_V4 */ |
135 | 136 | ||
137 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred) | ||
138 | { | ||
139 | struct nfs_open_dir_context *ctx; | ||
140 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | ||
141 | if (ctx != NULL) { | ||
142 | ctx->duped = 0; | ||
143 | ctx->dir_cookie = 0; | ||
144 | ctx->dup_cookie = 0; | ||
145 | ctx->cred = get_rpccred(cred); | ||
146 | } else | ||
147 | ctx = ERR_PTR(-ENOMEM); | ||
148 | return ctx; | ||
149 | } | ||
150 | |||
151 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) | ||
152 | { | ||
153 | put_rpccred(ctx->cred); | ||
154 | kfree(ctx); | ||
155 | } | ||
156 | |||
136 | /* | 157 | /* |
137 | * Open file | 158 | * Open file |
138 | */ | 159 | */ |
139 | static int | 160 | static int |
140 | nfs_opendir(struct inode *inode, struct file *filp) | 161 | nfs_opendir(struct inode *inode, struct file *filp) |
141 | { | 162 | { |
142 | int res; | 163 | int res = 0; |
164 | struct nfs_open_dir_context *ctx; | ||
165 | struct rpc_cred *cred; | ||
143 | 166 | ||
144 | dfprintk(FILE, "NFS: open dir(%s/%s)\n", | 167 | dfprintk(FILE, "NFS: open dir(%s/%s)\n", |
145 | filp->f_path.dentry->d_parent->d_name.name, | 168 | filp->f_path.dentry->d_parent->d_name.name, |
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
147 | 170 | ||
148 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); | 171 | nfs_inc_stats(inode, NFSIOS_VFSOPEN); |
149 | 172 | ||
150 | /* Call generic open code in order to cache credentials */ | 173 | cred = rpc_lookup_cred(); |
151 | res = nfs_open(inode, filp); | 174 | if (IS_ERR(cred)) |
175 | return PTR_ERR(cred); | ||
176 | ctx = alloc_nfs_open_dir_context(cred); | ||
177 | if (IS_ERR(ctx)) { | ||
178 | res = PTR_ERR(ctx); | ||
179 | goto out; | ||
180 | } | ||
181 | filp->private_data = ctx; | ||
152 | if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { | 182 | if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { |
153 | /* This is a mountpoint, so d_revalidate will never | 183 | /* This is a mountpoint, so d_revalidate will never |
154 | * have been called, so we need to refresh the | 184 | * have been called, so we need to refresh the |
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
156 | */ | 186 | */ |
157 | __nfs_revalidate_inode(NFS_SERVER(inode), inode); | 187 | __nfs_revalidate_inode(NFS_SERVER(inode), inode); |
158 | } | 188 | } |
189 | out: | ||
190 | put_rpccred(cred); | ||
159 | return res; | 191 | return res; |
160 | } | 192 | } |
161 | 193 | ||
194 | static int | ||
195 | nfs_closedir(struct inode *inode, struct file *filp) | ||
196 | { | ||
197 | put_nfs_open_dir_context(filp->private_data); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
162 | struct nfs_cache_array_entry { | 201 | struct nfs_cache_array_entry { |
163 | u64 cookie; | 202 | u64 cookie; |
164 | u64 ino; | 203 | u64 ino; |
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri | |||
284 | { | 323 | { |
285 | loff_t diff = desc->file->f_pos - desc->current_index; | 324 | loff_t diff = desc->file->f_pos - desc->current_index; |
286 | unsigned int index; | 325 | unsigned int index; |
326 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
287 | 327 | ||
288 | if (diff < 0) | 328 | if (diff < 0) |
289 | goto out_eof; | 329 | goto out_eof; |
290 | if (diff >= array->size) { | 330 | if (diff >= array->size) { |
291 | if (array->eof_index >= 0) | 331 | if (array->eof_index >= 0) |
292 | goto out_eof; | 332 | goto out_eof; |
293 | desc->current_index += array->size; | ||
294 | return -EAGAIN; | 333 | return -EAGAIN; |
295 | } | 334 | } |
296 | 335 | ||
297 | index = (unsigned int)diff; | 336 | index = (unsigned int)diff; |
298 | *desc->dir_cookie = array->array[index].cookie; | 337 | *desc->dir_cookie = array->array[index].cookie; |
299 | desc->cache_entry_index = index; | 338 | desc->cache_entry_index = index; |
339 | ctx->duped = 0; | ||
300 | return 0; | 340 | return 0; |
301 | out_eof: | 341 | out_eof: |
302 | desc->eof = 1; | 342 | desc->eof = 1; |
@@ -307,10 +347,18 @@ static | |||
307 | int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) | 347 | int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) |
308 | { | 348 | { |
309 | int i; | 349 | int i; |
350 | loff_t new_pos; | ||
310 | int status = -EAGAIN; | 351 | int status = -EAGAIN; |
352 | struct nfs_open_dir_context *ctx = desc->file->private_data; | ||
311 | 353 | ||
312 | for (i = 0; i < array->size; i++) { | 354 | for (i = 0; i < array->size; i++) { |
313 | if (array->array[i].cookie == *desc->dir_cookie) { | 355 | if (array->array[i].cookie == *desc->dir_cookie) { |
356 | new_pos = desc->current_index + i; | ||
357 | if (new_pos < desc->file->f_pos) { | ||
358 | ctx->dup_cookie = *desc->dir_cookie; | ||
359 | ctx->duped = 1; | ||
360 | } | ||
361 | desc->file->f_pos = new_pos; | ||
314 | desc->cache_entry_index = i; | 362 | desc->cache_entry_index = i; |
315 | return 0; | 363 | return 0; |
316 | } | 364 | } |
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) | |||
342 | 390 | ||
343 | if (status == -EAGAIN) { | 391 | if (status == -EAGAIN) { |
344 | desc->last_cookie = array->last_cookie; | 392 | desc->last_cookie = array->last_cookie; |
393 | desc->current_index += array->size; | ||
345 | desc->page_index++; | 394 | desc->page_index++; |
346 | } | 395 | } |
347 | nfs_readdir_release_array(desc->page); | 396 | nfs_readdir_release_array(desc->page); |
@@ -354,7 +403,8 @@ static | |||
354 | int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, | 403 | int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, |
355 | struct nfs_entry *entry, struct file *file, struct inode *inode) | 404 | struct nfs_entry *entry, struct file *file, struct inode *inode) |
356 | { | 405 | { |
357 | struct rpc_cred *cred = nfs_file_cred(file); | 406 | struct nfs_open_dir_context *ctx = file->private_data; |
407 | struct rpc_cred *cred = ctx->cred; | ||
358 | unsigned long timestamp, gencount; | 408 | unsigned long timestamp, gencount; |
359 | int error; | 409 | int error; |
360 | 410 | ||
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
693 | int i = 0; | 743 | int i = 0; |
694 | int res = 0; | 744 | int res = 0; |
695 | struct nfs_cache_array *array = NULL; | 745 | struct nfs_cache_array *array = NULL; |
746 | struct nfs_open_dir_context *ctx = file->private_data; | ||
747 | |||
748 | if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) { | ||
749 | if (printk_ratelimit()) { | ||
750 | pr_notice("NFS: directory %s/%s contains a readdir loop. " | ||
751 | "Please contact your server vendor. " | ||
752 | "Offending cookie: %llu\n", | ||
753 | file->f_dentry->d_parent->d_name.name, | ||
754 | file->f_dentry->d_name.name, | ||
755 | *desc->dir_cookie); | ||
756 | } | ||
757 | res = -ELOOP; | ||
758 | goto out; | ||
759 | } | ||
696 | 760 | ||
697 | array = nfs_readdir_get_array(desc->page); | 761 | array = nfs_readdir_get_array(desc->page); |
698 | if (IS_ERR(array)) { | 762 | if (IS_ERR(array)) { |
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
785 | struct inode *inode = dentry->d_inode; | 849 | struct inode *inode = dentry->d_inode; |
786 | nfs_readdir_descriptor_t my_desc, | 850 | nfs_readdir_descriptor_t my_desc, |
787 | *desc = &my_desc; | 851 | *desc = &my_desc; |
852 | struct nfs_open_dir_context *dir_ctx = filp->private_data; | ||
788 | int res; | 853 | int res; |
789 | 854 | ||
790 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 855 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
801 | memset(desc, 0, sizeof(*desc)); | 866 | memset(desc, 0, sizeof(*desc)); |
802 | 867 | ||
803 | desc->file = filp; | 868 | desc->file = filp; |
804 | desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; | 869 | desc->dir_cookie = &dir_ctx->dir_cookie; |
805 | desc->decode = NFS_PROTO(inode)->decode_dirent; | 870 | desc->decode = NFS_PROTO(inode)->decode_dirent; |
806 | desc->plus = NFS_USE_READDIRPLUS(inode); | 871 | desc->plus = NFS_USE_READDIRPLUS(inode); |
807 | 872 | ||
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | |||
853 | { | 918 | { |
854 | struct dentry *dentry = filp->f_path.dentry; | 919 | struct dentry *dentry = filp->f_path.dentry; |
855 | struct inode *inode = dentry->d_inode; | 920 | struct inode *inode = dentry->d_inode; |
921 | struct nfs_open_dir_context *dir_ctx = filp->private_data; | ||
856 | 922 | ||
857 | dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", | 923 | dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", |
858 | dentry->d_parent->d_name.name, | 924 | dentry->d_parent->d_name.name, |
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) | |||
872 | } | 938 | } |
873 | if (offset != filp->f_pos) { | 939 | if (offset != filp->f_pos) { |
874 | filp->f_pos = offset; | 940 | filp->f_pos = offset; |
875 | nfs_file_open_context(filp)->dir_cookie = 0; | 941 | dir_ctx->dir_cookie = 0; |
942 | dir_ctx->duped = 0; | ||
876 | } | 943 | } |
877 | out: | 944 | out: |
878 | mutex_unlock(&inode->i_mutex); | 945 | mutex_unlock(&inode->i_mutex); |
@@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1068 | if (fhandle == NULL || fattr == NULL) | 1135 | if (fhandle == NULL || fattr == NULL) |
1069 | goto out_error; | 1136 | goto out_error; |
1070 | 1137 | ||
1071 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1138 | error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); |
1072 | if (error) | 1139 | if (error) |
1073 | goto out_bad; | 1140 | goto out_bad; |
1074 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) | 1141 | if (nfs_compare_fh(NFS_FH(inode), fhandle)) |
@@ -1224,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru | |||
1224 | parent = dentry->d_parent; | 1291 | parent = dentry->d_parent; |
1225 | /* Protect against concurrent sillydeletes */ | 1292 | /* Protect against concurrent sillydeletes */ |
1226 | nfs_block_sillyrename(parent); | 1293 | nfs_block_sillyrename(parent); |
1227 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1294 | error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); |
1228 | if (error == -ENOENT) | 1295 | if (error == -ENOENT) |
1229 | goto no_entry; | 1296 | goto no_entry; |
1230 | if (error < 0) { | 1297 | if (error < 0) { |
@@ -1562,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, | |||
1562 | if (dentry->d_inode) | 1629 | if (dentry->d_inode) |
1563 | goto out; | 1630 | goto out; |
1564 | if (fhandle->size == 0) { | 1631 | if (fhandle->size == 0) { |
1565 | error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); | 1632 | error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr); |
1566 | if (error) | 1633 | if (error) |
1567 | goto out_error; | 1634 | goto out_error; |
1568 | } | 1635 | } |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d85a534b15cd..3ac5bd695e5e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync) | |||
326 | ret = xchg(&ctx->error, 0); | 326 | ret = xchg(&ctx->error, 0); |
327 | if (!ret && status < 0) | 327 | if (!ret && status < 0) |
328 | ret = status; | 328 | ret = status; |
329 | if (!ret && !datasync) | ||
330 | /* application has asked for meta-data sync */ | ||
331 | ret = pnfs_layoutcommit_inode(inode, true); | ||
329 | return ret; | 332 | return ret; |
330 | } | 333 | } |
331 | 334 | ||
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 1084792bc0fe..dcb61548887f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -222,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh, | |||
222 | goto out; | 222 | goto out; |
223 | } | 223 | } |
224 | 224 | ||
225 | if (fattr->valid & NFS_ATTR_FATTR_FSID && | ||
226 | !nfs_fsid_equal(&server->fsid, &fattr->fsid)) | ||
227 | memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid)); | ||
228 | |||
225 | inode = nfs_fhget(sb, mntfh, fattr); | 229 | inode = nfs_fhget(sb, mntfh, fattr); |
226 | if (IS_ERR(inode)) { | 230 | if (IS_ERR(inode)) { |
227 | dprintk("nfs_get_root: get root inode failed\n"); | 231 | dprintk("nfs_get_root: get root inode failed\n"); |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 01768e5e2c9b..57bb31ad7a5e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -254,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
254 | struct inode *inode = ERR_PTR(-ENOENT); | 254 | struct inode *inode = ERR_PTR(-ENOENT); |
255 | unsigned long hash; | 255 | unsigned long hash; |
256 | 256 | ||
257 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) | 257 | nfs_attr_check_mountpoint(sb, fattr); |
258 | |||
259 | if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) | ||
258 | goto out_no_inode; | 260 | goto out_no_inode; |
259 | if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) | 261 | if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) |
260 | goto out_no_inode; | 262 | goto out_no_inode; |
@@ -298,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
298 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) | 300 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) |
299 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); | 301 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
300 | /* Deal with crossing mountpoints */ | 302 | /* Deal with crossing mountpoints */ |
301 | if ((fattr->valid & NFS_ATTR_FATTR_FSID) | 303 | if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT || |
302 | && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { | 304 | fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) { |
303 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) | 305 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) |
304 | inode->i_op = &nfs_referral_inode_operations; | 306 | inode->i_op = &nfs_referral_inode_operations; |
305 | else | 307 | else |
@@ -639,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr | |||
639 | ctx->mode = f_mode; | 641 | ctx->mode = f_mode; |
640 | ctx->flags = 0; | 642 | ctx->flags = 0; |
641 | ctx->error = 0; | 643 | ctx->error = 0; |
642 | ctx->dir_cookie = 0; | ||
643 | nfs_init_lock_context(&ctx->lock_context); | 644 | nfs_init_lock_context(&ctx->lock_context); |
644 | ctx->lock_context.open_context = ctx; | 645 | ctx->lock_context.open_context = ctx; |
645 | INIT_LIST_HEAD(&ctx->list); | 646 | INIT_LIST_HEAD(&ctx->list); |
@@ -1471,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) | |||
1471 | nfsi->delegation_state = 0; | 1472 | nfsi->delegation_state = 0; |
1472 | init_rwsem(&nfsi->rwsem); | 1473 | init_rwsem(&nfsi->rwsem); |
1473 | nfsi->layout = NULL; | 1474 | nfsi->layout = NULL; |
1475 | atomic_set(&nfsi->commits_outstanding, 0); | ||
1474 | #endif | 1476 | #endif |
1475 | } | 1477 | } |
1476 | 1478 | ||
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 72e0bddf7a2f..ce118ce885dd 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp) | |||
39 | return 0; | 39 | return 0; |
40 | } | 40 | } |
41 | 41 | ||
42 | static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr) | ||
43 | { | ||
44 | if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid)) | ||
45 | fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT; | ||
46 | } | ||
47 | |||
42 | struct nfs_clone_mount { | 48 | struct nfs_clone_mount { |
43 | const struct super_block *sb; | 49 | const struct super_block *sb; |
44 | const struct dentry *dentry; | 50 | const struct dentry *dentry; |
@@ -214,6 +220,7 @@ extern const u32 nfs41_maxwrite_overhead; | |||
214 | /* nfs4proc.c */ | 220 | /* nfs4proc.c */ |
215 | #ifdef CONFIG_NFS_V4 | 221 | #ifdef CONFIG_NFS_V4 |
216 | extern struct rpc_procinfo nfs4_procedures[]; | 222 | extern struct rpc_procinfo nfs4_procedures[]; |
223 | void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *); | ||
217 | #endif | 224 | #endif |
218 | 225 | ||
219 | extern int nfs4_init_ds_session(struct nfs_client *clp); | 226 | extern int nfs4_init_ds_session(struct nfs_client *clp); |
@@ -276,11 +283,25 @@ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, | |||
276 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); | 283 | extern void nfs_read_prepare(struct rpc_task *task, void *calldata); |
277 | 284 | ||
278 | /* write.c */ | 285 | /* write.c */ |
286 | extern void nfs_commit_free(struct nfs_write_data *p); | ||
279 | extern int nfs_initiate_write(struct nfs_write_data *data, | 287 | extern int nfs_initiate_write(struct nfs_write_data *data, |
280 | struct rpc_clnt *clnt, | 288 | struct rpc_clnt *clnt, |
281 | const struct rpc_call_ops *call_ops, | 289 | const struct rpc_call_ops *call_ops, |
282 | int how); | 290 | int how); |
283 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); | 291 | extern void nfs_write_prepare(struct rpc_task *task, void *calldata); |
292 | extern int nfs_initiate_commit(struct nfs_write_data *data, | ||
293 | struct rpc_clnt *clnt, | ||
294 | const struct rpc_call_ops *call_ops, | ||
295 | int how); | ||
296 | extern void nfs_init_commit(struct nfs_write_data *data, | ||
297 | struct list_head *head, | ||
298 | struct pnfs_layout_segment *lseg); | ||
299 | void nfs_retry_commit(struct list_head *page_list, | ||
300 | struct pnfs_layout_segment *lseg); | ||
301 | void nfs_commit_clear_lock(struct nfs_inode *nfsi); | ||
302 | void nfs_commitdata_release(void *data); | ||
303 | void nfs_commit_release_pages(struct nfs_write_data *data); | ||
304 | |||
284 | #ifdef CONFIG_MIGRATION | 305 | #ifdef CONFIG_MIGRATION |
285 | extern int nfs_migrate_page(struct address_space *, | 306 | extern int nfs_migrate_page(struct address_space *, |
286 | struct page *, struct page *); | 307 | struct page *, struct page *); |
@@ -296,12 +317,14 @@ extern int nfs4_init_client(struct nfs_client *clp, | |||
296 | rpc_authflavor_t authflavour, | 317 | rpc_authflavor_t authflavour, |
297 | int noresvport); | 318 | int noresvport); |
298 | extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); | 319 | extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data); |
299 | extern int _nfs4_call_sync(struct nfs_server *server, | 320 | extern int _nfs4_call_sync(struct rpc_clnt *clnt, |
321 | struct nfs_server *server, | ||
300 | struct rpc_message *msg, | 322 | struct rpc_message *msg, |
301 | struct nfs4_sequence_args *args, | 323 | struct nfs4_sequence_args *args, |
302 | struct nfs4_sequence_res *res, | 324 | struct nfs4_sequence_res *res, |
303 | int cache_reply); | 325 | int cache_reply); |
304 | extern int _nfs4_call_sync_session(struct nfs_server *server, | 326 | extern int _nfs4_call_sync_session(struct rpc_clnt *clnt, |
327 | struct nfs_server *server, | ||
305 | struct rpc_message *msg, | 328 | struct rpc_message *msg, |
306 | struct nfs4_sequence_args *args, | 329 | struct nfs4_sequence_args *args, |
307 | struct nfs4_sequence_res *res, | 330 | struct nfs4_sequence_res *res, |
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index bf1c68009ffd..ad92bf731ff5 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/sunrpc/clnt.h> | 16 | #include <linux/sunrpc/clnt.h> |
17 | #include <linux/vfs.h> | 17 | #include <linux/vfs.h> |
18 | #include <linux/sunrpc/gss_api.h> | ||
18 | #include "internal.h" | 19 | #include "internal.h" |
19 | 20 | ||
20 | #define NFSDBG_FACILITY NFSDBG_VFS | 21 | #define NFSDBG_FACILITY NFSDBG_VFS |
@@ -27,7 +28,8 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; | |||
27 | 28 | ||
28 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, | 29 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, |
29 | struct nfs_fh *fh, | 30 | struct nfs_fh *fh, |
30 | struct nfs_fattr *fattr); | 31 | struct nfs_fattr *fattr, |
32 | rpc_authflavor_t authflavor); | ||
31 | 33 | ||
32 | /* | 34 | /* |
33 | * nfs_path - reconstruct the path given an arbitrary dentry | 35 | * nfs_path - reconstruct the path given an arbitrary dentry |
@@ -116,6 +118,100 @@ Elong: | |||
116 | return ERR_PTR(-ENAMETOOLONG); | 118 | return ERR_PTR(-ENAMETOOLONG); |
117 | } | 119 | } |
118 | 120 | ||
121 | #ifdef CONFIG_NFS_V4 | ||
122 | static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors, struct inode *inode) | ||
123 | { | ||
124 | struct gss_api_mech *mech; | ||
125 | struct xdr_netobj oid; | ||
126 | int i; | ||
127 | rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; | ||
128 | |||
129 | for (i = 0; i < flavors->num_flavors; i++) { | ||
130 | struct nfs4_secinfo_flavor *flavor; | ||
131 | flavor = &flavors->flavors[i]; | ||
132 | |||
133 | if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) { | ||
134 | pseudoflavor = flavor->flavor; | ||
135 | break; | ||
136 | } else if (flavor->flavor == RPC_AUTH_GSS) { | ||
137 | oid.len = flavor->gss.sec_oid4.len; | ||
138 | oid.data = flavor->gss.sec_oid4.data; | ||
139 | mech = gss_mech_get_by_OID(&oid); | ||
140 | if (!mech) | ||
141 | continue; | ||
142 | pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service); | ||
143 | gss_mech_put(mech); | ||
144 | break; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | return pseudoflavor; | ||
149 | } | ||
150 | |||
151 | static rpc_authflavor_t nfs_negotiate_security(const struct dentry *parent, const struct dentry *dentry) | ||
152 | { | ||
153 | int status = 0; | ||
154 | struct page *page; | ||
155 | struct nfs4_secinfo_flavors *flavors; | ||
156 | int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); | ||
157 | rpc_authflavor_t flavor = RPC_AUTH_UNIX; | ||
158 | |||
159 | secinfo = NFS_PROTO(parent->d_inode)->secinfo; | ||
160 | if (secinfo != NULL) { | ||
161 | page = alloc_page(GFP_KERNEL); | ||
162 | if (!page) { | ||
163 | status = -ENOMEM; | ||
164 | goto out; | ||
165 | } | ||
166 | flavors = page_address(page); | ||
167 | status = secinfo(parent->d_inode, &dentry->d_name, flavors); | ||
168 | flavor = nfs_find_best_sec(flavors, dentry->d_inode); | ||
169 | put_page(page); | ||
170 | } | ||
171 | |||
172 | return flavor; | ||
173 | |||
174 | out: | ||
175 | status = -ENOMEM; | ||
176 | return status; | ||
177 | } | ||
178 | |||
179 | static rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, | ||
180 | struct dentry *dentry, struct path *path, | ||
181 | struct nfs_fh *fh, struct nfs_fattr *fattr) | ||
182 | { | ||
183 | rpc_authflavor_t flavor; | ||
184 | struct rpc_clnt *clone; | ||
185 | struct rpc_auth *auth; | ||
186 | int err; | ||
187 | |||
188 | flavor = nfs_negotiate_security(parent, path->dentry); | ||
189 | if (flavor < 0) | ||
190 | goto out; | ||
191 | clone = rpc_clone_client(server->client); | ||
192 | auth = rpcauth_create(flavor, clone); | ||
193 | if (!auth) { | ||
194 | flavor = -EIO; | ||
195 | goto out; | ||
196 | } | ||
197 | err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, | ||
198 | &path->dentry->d_name, | ||
199 | fh, fattr); | ||
200 | if (err < 0) | ||
201 | flavor = err; | ||
202 | out: | ||
203 | return flavor; | ||
204 | } | ||
205 | #else /* CONFIG_NFS_V4 */ | ||
206 | static inline rpc_authflavor_t nfs_lookup_with_sec(struct nfs_server *server, | ||
207 | struct dentry *parent, struct dentry *dentry, | ||
208 | struct path *path, struct nfs_fh *fh, | ||
209 | struct nfs_fattr *fattr) | ||
210 | { | ||
211 | return -EPERM; | ||
212 | } | ||
213 | #endif /* CONFIG_NFS_V4 */ | ||
214 | |||
119 | /* | 215 | /* |
120 | * nfs_d_automount - Handle crossing a mountpoint on the server | 216 | * nfs_d_automount - Handle crossing a mountpoint on the server |
121 | * @path - The mountpoint | 217 | * @path - The mountpoint |
@@ -136,6 +232,7 @@ struct vfsmount *nfs_d_automount(struct path *path) | |||
136 | struct nfs_fh *fh = NULL; | 232 | struct nfs_fh *fh = NULL; |
137 | struct nfs_fattr *fattr = NULL; | 233 | struct nfs_fattr *fattr = NULL; |
138 | int err; | 234 | int err; |
235 | rpc_authflavor_t flavor = 1; | ||
139 | 236 | ||
140 | dprintk("--> nfs_d_automount()\n"); | 237 | dprintk("--> nfs_d_automount()\n"); |
141 | 238 | ||
@@ -153,9 +250,16 @@ struct vfsmount *nfs_d_automount(struct path *path) | |||
153 | 250 | ||
154 | /* Look it up again to get its attributes */ | 251 | /* Look it up again to get its attributes */ |
155 | parent = dget_parent(path->dentry); | 252 | parent = dget_parent(path->dentry); |
156 | err = server->nfs_client->rpc_ops->lookup(parent->d_inode, | 253 | err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, |
157 | &path->dentry->d_name, | 254 | &path->dentry->d_name, |
158 | fh, fattr); | 255 | fh, fattr); |
256 | if (err == -EPERM) { | ||
257 | flavor = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr); | ||
258 | if (flavor < 0) | ||
259 | err = flavor; | ||
260 | else | ||
261 | err = 0; | ||
262 | } | ||
159 | dput(parent); | 263 | dput(parent); |
160 | if (err != 0) { | 264 | if (err != 0) { |
161 | mnt = ERR_PTR(err); | 265 | mnt = ERR_PTR(err); |
@@ -165,7 +269,7 @@ struct vfsmount *nfs_d_automount(struct path *path) | |||
165 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) | 269 | if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) |
166 | mnt = nfs_do_refmount(path->dentry); | 270 | mnt = nfs_do_refmount(path->dentry); |
167 | else | 271 | else |
168 | mnt = nfs_do_submount(path->dentry, fh, fattr); | 272 | mnt = nfs_do_submount(path->dentry, fh, fattr, flavor); |
169 | if (IS_ERR(mnt)) | 273 | if (IS_ERR(mnt)) |
170 | goto out; | 274 | goto out; |
171 | 275 | ||
@@ -232,17 +336,20 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
232 | * @dentry - parent directory | 336 | * @dentry - parent directory |
233 | * @fh - filehandle for new root dentry | 337 | * @fh - filehandle for new root dentry |
234 | * @fattr - attributes for new root inode | 338 | * @fattr - attributes for new root inode |
339 | * @authflavor - security flavor to use when performing the mount | ||
235 | * | 340 | * |
236 | */ | 341 | */ |
237 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, | 342 | static struct vfsmount *nfs_do_submount(struct dentry *dentry, |
238 | struct nfs_fh *fh, | 343 | struct nfs_fh *fh, |
239 | struct nfs_fattr *fattr) | 344 | struct nfs_fattr *fattr, |
345 | rpc_authflavor_t authflavor) | ||
240 | { | 346 | { |
241 | struct nfs_clone_mount mountdata = { | 347 | struct nfs_clone_mount mountdata = { |
242 | .sb = dentry->d_sb, | 348 | .sb = dentry->d_sb, |
243 | .dentry = dentry, | 349 | .dentry = dentry, |
244 | .fh = fh, | 350 | .fh = fh, |
245 | .fattr = fattr, | 351 | .fattr = fattr, |
352 | .authflavor = authflavor, | ||
246 | }; | 353 | }; |
247 | struct vfsmount *mnt = ERR_PTR(-ENOMEM); | 354 | struct vfsmount *mnt = ERR_PTR(-ENOMEM); |
248 | char *page = (char *) __get_free_page(GFP_USER); | 355 | char *page = (char *) __get_free_page(GFP_USER); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index d0c80d8b3f96..38053d823eb0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
141 | } | 141 | } |
142 | 142 | ||
143 | static int | 143 | static int |
144 | nfs3_proc_lookup(struct inode *dir, struct qstr *name, | 144 | nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, |
145 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 145 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
146 | { | 146 | { |
147 | struct nfs3_diropargs arg = { | 147 | struct nfs3_diropargs arg = { |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c64be1cff080..e1c261ddd65d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -57,7 +57,8 @@ enum nfs4_session_state { | |||
57 | struct nfs4_minor_version_ops { | 57 | struct nfs4_minor_version_ops { |
58 | u32 minor_version; | 58 | u32 minor_version; |
59 | 59 | ||
60 | int (*call_sync)(struct nfs_server *server, | 60 | int (*call_sync)(struct rpc_clnt *clnt, |
61 | struct nfs_server *server, | ||
61 | struct rpc_message *msg, | 62 | struct rpc_message *msg, |
62 | struct nfs4_sequence_args *args, | 63 | struct nfs4_sequence_args *args, |
63 | struct nfs4_sequence_res *res, | 64 | struct nfs4_sequence_res *res, |
@@ -262,6 +263,8 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *); | |||
262 | extern int nfs4_init_session(struct nfs_server *server); | 263 | extern int nfs4_init_session(struct nfs_server *server); |
263 | extern int nfs4_proc_get_lease_time(struct nfs_client *clp, | 264 | extern int nfs4_proc_get_lease_time(struct nfs_client *clp, |
264 | struct nfs_fsinfo *fsinfo); | 265 | struct nfs_fsinfo *fsinfo); |
266 | extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, | ||
267 | bool sync); | ||
265 | 268 | ||
266 | static inline bool | 269 | static inline bool |
267 | is_ds_only_client(struct nfs_client *clp) | 270 | is_ds_only_client(struct nfs_client *clp) |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 428558464817..6f8192f4cfc7 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -154,6 +154,23 @@ static int filelayout_read_done_cb(struct rpc_task *task, | |||
154 | } | 154 | } |
155 | 155 | ||
156 | /* | 156 | /* |
157 | * We reference the rpc_cred of the first WRITE that triggers the need for | ||
158 | * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. | ||
159 | * rfc5661 is not clear about which credential should be used. | ||
160 | */ | ||
161 | static void | ||
162 | filelayout_set_layoutcommit(struct nfs_write_data *wdata) | ||
163 | { | ||
164 | if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds || | ||
165 | wdata->res.verf->committed == NFS_FILE_SYNC) | ||
166 | return; | ||
167 | |||
168 | pnfs_set_layoutcommit(wdata); | ||
169 | dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino, | ||
170 | (unsigned long) wdata->lseg->pls_end_pos); | ||
171 | } | ||
172 | |||
173 | /* | ||
157 | * Call ops for the async read/write cases | 174 | * Call ops for the async read/write cases |
158 | * In the case of dense layouts, the offset needs to be reset to its | 175 | * In the case of dense layouts, the offset needs to be reset to its |
159 | * original value. | 176 | * original value. |
@@ -210,6 +227,38 @@ static int filelayout_write_done_cb(struct rpc_task *task, | |||
210 | return -EAGAIN; | 227 | return -EAGAIN; |
211 | } | 228 | } |
212 | 229 | ||
230 | filelayout_set_layoutcommit(data); | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | /* Fake up some data that will cause nfs_commit_release to retry the writes. */ | ||
235 | static void prepare_to_resend_writes(struct nfs_write_data *data) | ||
236 | { | ||
237 | struct nfs_page *first = nfs_list_entry(data->pages.next); | ||
238 | |||
239 | data->task.tk_status = 0; | ||
240 | memcpy(data->verf.verifier, first->wb_verf.verifier, | ||
241 | sizeof(first->wb_verf.verifier)); | ||
242 | data->verf.verifier[0]++; /* ensure verifier mismatch */ | ||
243 | } | ||
244 | |||
245 | static int filelayout_commit_done_cb(struct rpc_task *task, | ||
246 | struct nfs_write_data *data) | ||
247 | { | ||
248 | int reset = 0; | ||
249 | |||
250 | if (filelayout_async_handle_error(task, data->args.context->state, | ||
251 | data->ds_clp, &reset) == -EAGAIN) { | ||
252 | dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n", | ||
253 | __func__, data->ds_clp, data->ds_clp->cl_session); | ||
254 | if (reset) { | ||
255 | prepare_to_resend_writes(data); | ||
256 | filelayout_set_lo_fail(data->lseg); | ||
257 | } else | ||
258 | nfs_restart_rpc(task, data->ds_clp); | ||
259 | return -EAGAIN; | ||
260 | } | ||
261 | |||
213 | return 0; | 262 | return 0; |
214 | } | 263 | } |
215 | 264 | ||
@@ -240,6 +289,16 @@ static void filelayout_write_release(void *data) | |||
240 | wdata->mds_ops->rpc_release(data); | 289 | wdata->mds_ops->rpc_release(data); |
241 | } | 290 | } |
242 | 291 | ||
292 | static void filelayout_commit_release(void *data) | ||
293 | { | ||
294 | struct nfs_write_data *wdata = (struct nfs_write_data *)data; | ||
295 | |||
296 | nfs_commit_release_pages(wdata); | ||
297 | if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding)) | ||
298 | nfs_commit_clear_lock(NFS_I(wdata->inode)); | ||
299 | nfs_commitdata_release(wdata); | ||
300 | } | ||
301 | |||
243 | struct rpc_call_ops filelayout_read_call_ops = { | 302 | struct rpc_call_ops filelayout_read_call_ops = { |
244 | .rpc_call_prepare = filelayout_read_prepare, | 303 | .rpc_call_prepare = filelayout_read_prepare, |
245 | .rpc_call_done = filelayout_read_call_done, | 304 | .rpc_call_done = filelayout_read_call_done, |
@@ -252,6 +311,12 @@ struct rpc_call_ops filelayout_write_call_ops = { | |||
252 | .rpc_release = filelayout_write_release, | 311 | .rpc_release = filelayout_write_release, |
253 | }; | 312 | }; |
254 | 313 | ||
314 | struct rpc_call_ops filelayout_commit_call_ops = { | ||
315 | .rpc_call_prepare = filelayout_write_prepare, | ||
316 | .rpc_call_done = filelayout_write_call_done, | ||
317 | .rpc_release = filelayout_commit_release, | ||
318 | }; | ||
319 | |||
255 | static enum pnfs_try_status | 320 | static enum pnfs_try_status |
256 | filelayout_read_pagelist(struct nfs_read_data *data) | 321 | filelayout_read_pagelist(struct nfs_read_data *data) |
257 | { | 322 | { |
@@ -320,10 +385,6 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) | |||
320 | data->inode->i_ino, sync, (size_t) data->args.count, offset, | 385 | data->inode->i_ino, sync, (size_t) data->args.count, offset, |
321 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); | 386 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); |
322 | 387 | ||
323 | /* We can't handle commit to ds yet */ | ||
324 | if (!FILELAYOUT_LSEG(lseg)->commit_through_mds) | ||
325 | data->args.stable = NFS_FILE_SYNC; | ||
326 | |||
327 | data->write_done_cb = filelayout_write_done_cb; | 388 | data->write_done_cb = filelayout_write_done_cb; |
328 | data->ds_clp = ds->ds_clp; | 389 | data->ds_clp = ds->ds_clp; |
329 | fh = nfs4_fl_select_ds_fh(lseg, j); | 390 | fh = nfs4_fl_select_ds_fh(lseg, j); |
@@ -441,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
441 | struct nfs4_layoutget_res *lgr, | 502 | struct nfs4_layoutget_res *lgr, |
442 | struct nfs4_deviceid *id) | 503 | struct nfs4_deviceid *id) |
443 | { | 504 | { |
444 | uint32_t *p = (uint32_t *)lgr->layout.buf; | 505 | struct xdr_stream stream; |
506 | struct xdr_buf buf = { | ||
507 | .pages = lgr->layoutp->pages, | ||
508 | .page_len = lgr->layoutp->len, | ||
509 | .buflen = lgr->layoutp->len, | ||
510 | .len = lgr->layoutp->len, | ||
511 | }; | ||
512 | struct page *scratch; | ||
513 | __be32 *p; | ||
445 | uint32_t nfl_util; | 514 | uint32_t nfl_util; |
446 | int i; | 515 | int i; |
447 | 516 | ||
448 | dprintk("%s: set_layout_map Begin\n", __func__); | 517 | dprintk("%s: set_layout_map Begin\n", __func__); |
449 | 518 | ||
519 | scratch = alloc_page(GFP_KERNEL); | ||
520 | if (!scratch) | ||
521 | return -ENOMEM; | ||
522 | |||
523 | xdr_init_decode(&stream, &buf, NULL); | ||
524 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
525 | |||
526 | /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), | ||
527 | * num_fh (4) */ | ||
528 | p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20); | ||
529 | if (unlikely(!p)) | ||
530 | goto out_err; | ||
531 | |||
450 | memcpy(id, p, sizeof(*id)); | 532 | memcpy(id, p, sizeof(*id)); |
451 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | 533 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); |
452 | print_deviceid(id); | 534 | print_deviceid(id); |
@@ -468,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, | |||
468 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, | 550 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, |
469 | fl->pattern_offset); | 551 | fl->pattern_offset); |
470 | 552 | ||
553 | if (!fl->num_fh) | ||
554 | goto out_err; | ||
555 | |||
471 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), | 556 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), |
472 | GFP_KERNEL); | 557 | GFP_KERNEL); |
473 | if (!fl->fh_array) | 558 | if (!fl->fh_array) |
474 | return -ENOMEM; | 559 | goto out_err; |
475 | 560 | ||
476 | for (i = 0; i < fl->num_fh; i++) { | 561 | for (i = 0; i < fl->num_fh; i++) { |
477 | /* Do we want to use a mempool here? */ | 562 | /* Do we want to use a mempool here? */ |
478 | fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); | 563 | fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); |
479 | if (!fl->fh_array[i]) { | 564 | if (!fl->fh_array[i]) |
480 | filelayout_free_fh_array(fl); | 565 | goto out_err_free; |
481 | return -ENOMEM; | 566 | |
482 | } | 567 | p = xdr_inline_decode(&stream, 4); |
568 | if (unlikely(!p)) | ||
569 | goto out_err_free; | ||
483 | fl->fh_array[i]->size = be32_to_cpup(p++); | 570 | fl->fh_array[i]->size = be32_to_cpup(p++); |
484 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { | 571 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { |
485 | printk(KERN_ERR "Too big fh %d received %d\n", | 572 | printk(KERN_ERR "Too big fh %d received %d\n", |
486 | i, fl->fh_array[i]->size); | 573 | i, fl->fh_array[i]->size); |
487 | filelayout_free_fh_array(fl); | 574 | goto out_err_free; |
488 | return -EIO; | ||
489 | } | 575 | } |
576 | |||
577 | p = xdr_inline_decode(&stream, fl->fh_array[i]->size); | ||
578 | if (unlikely(!p)) | ||
579 | goto out_err_free; | ||
490 | memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); | 580 | memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); |
491 | p += XDR_QUADLEN(fl->fh_array[i]->size); | ||
492 | dprintk("DEBUG: %s: fh len %d\n", __func__, | 581 | dprintk("DEBUG: %s: fh len %d\n", __func__, |
493 | fl->fh_array[i]->size); | 582 | fl->fh_array[i]->size); |
494 | } | 583 | } |
495 | 584 | ||
585 | __free_page(scratch); | ||
496 | return 0; | 586 | return 0; |
587 | |||
588 | out_err_free: | ||
589 | filelayout_free_fh_array(fl); | ||
590 | out_err: | ||
591 | __free_page(scratch); | ||
592 | return -EIO; | ||
593 | } | ||
594 | |||
595 | static void | ||
596 | filelayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
597 | { | ||
598 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||
599 | |||
600 | dprintk("--> %s\n", __func__); | ||
601 | nfs4_fl_put_deviceid(fl->dsaddr); | ||
602 | kfree(fl->commit_buckets); | ||
603 | _filelayout_free_lseg(fl); | ||
497 | } | 604 | } |
498 | 605 | ||
499 | static struct pnfs_layout_segment * | 606 | static struct pnfs_layout_segment * |
@@ -514,17 +621,28 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | |||
514 | _filelayout_free_lseg(fl); | 621 | _filelayout_free_lseg(fl); |
515 | return NULL; | 622 | return NULL; |
516 | } | 623 | } |
517 | return &fl->generic_hdr; | ||
518 | } | ||
519 | 624 | ||
520 | static void | 625 | /* This assumes there is only one IOMODE_RW lseg. What |
521 | filelayout_free_lseg(struct pnfs_layout_segment *lseg) | 626 | * we really want to do is have a layout_hdr level |
522 | { | 627 | * dictionary of <multipath_list4, fh> keys, each |
523 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | 628 | * associated with a struct list_head, populated by calls |
524 | 629 | * to filelayout_write_pagelist(). | |
525 | dprintk("--> %s\n", __func__); | 630 | * */ |
526 | nfs4_fl_put_deviceid(fl->dsaddr); | 631 | if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) { |
527 | _filelayout_free_lseg(fl); | 632 | int i; |
633 | int size = (fl->stripe_type == STRIPE_SPARSE) ? | ||
634 | fl->dsaddr->ds_num : fl->dsaddr->stripe_count; | ||
635 | |||
636 | fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); | ||
637 | if (!fl->commit_buckets) { | ||
638 | filelayout_free_lseg(&fl->generic_hdr); | ||
639 | return NULL; | ||
640 | } | ||
641 | fl->number_of_buckets = size; | ||
642 | for (i = 0; i < size; i++) | ||
643 | INIT_LIST_HEAD(&fl->commit_buckets[i]); | ||
644 | } | ||
645 | return &fl->generic_hdr; | ||
528 | } | 646 | } |
529 | 647 | ||
530 | /* | 648 | /* |
@@ -552,6 +670,191 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, | |||
552 | return (p_stripe == r_stripe); | 670 | return (p_stripe == r_stripe); |
553 | } | 671 | } |
554 | 672 | ||
673 | static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) | ||
674 | { | ||
675 | return !FILELAYOUT_LSEG(lseg)->commit_through_mds; | ||
676 | } | ||
677 | |||
678 | static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j) | ||
679 | { | ||
680 | if (fl->stripe_type == STRIPE_SPARSE) | ||
681 | return nfs4_fl_calc_ds_index(&fl->generic_hdr, j); | ||
682 | else | ||
683 | return j; | ||
684 | } | ||
685 | |||
686 | struct list_head *filelayout_choose_commit_list(struct nfs_page *req) | ||
687 | { | ||
688 | struct pnfs_layout_segment *lseg = req->wb_commit_lseg; | ||
689 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||
690 | u32 i, j; | ||
691 | struct list_head *list; | ||
692 | |||
693 | /* Note that we are calling nfs4_fl_calc_j_index on each page | ||
694 | * that ends up being committed to a data server. An attractive | ||
695 | * alternative is to add a field to nfs_write_data and nfs_page | ||
696 | * to store the value calculated in filelayout_write_pagelist | ||
697 | * and just use that here. | ||
698 | */ | ||
699 | j = nfs4_fl_calc_j_index(lseg, | ||
700 | (loff_t)req->wb_index << PAGE_CACHE_SHIFT); | ||
701 | i = select_bucket_index(fl, j); | ||
702 | list = &fl->commit_buckets[i]; | ||
703 | if (list_empty(list)) { | ||
704 | /* Non-empty buckets hold a reference on the lseg */ | ||
705 | get_lseg(lseg); | ||
706 | } | ||
707 | return list; | ||
708 | } | ||
709 | |||
710 | static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
711 | { | ||
712 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | ||
713 | |||
714 | if (flseg->stripe_type == STRIPE_SPARSE) | ||
715 | return i; | ||
716 | else | ||
717 | return nfs4_fl_calc_ds_index(lseg, i); | ||
718 | } | ||
719 | |||
720 | static struct nfs_fh * | ||
721 | select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
722 | { | ||
723 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); | ||
724 | |||
725 | if (flseg->stripe_type == STRIPE_SPARSE) { | ||
726 | if (flseg->num_fh == 1) | ||
727 | i = 0; | ||
728 | else if (flseg->num_fh == 0) | ||
729 | /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ | ||
730 | return NULL; | ||
731 | } | ||
732 | return flseg->fh_array[i]; | ||
733 | } | ||
734 | |||
735 | static int filelayout_initiate_commit(struct nfs_write_data *data, int how) | ||
736 | { | ||
737 | struct pnfs_layout_segment *lseg = data->lseg; | ||
738 | struct nfs4_pnfs_ds *ds; | ||
739 | u32 idx; | ||
740 | struct nfs_fh *fh; | ||
741 | |||
742 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); | ||
743 | ds = nfs4_fl_prepare_ds(lseg, idx); | ||
744 | if (!ds) { | ||
745 | printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__); | ||
746 | set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags); | ||
747 | set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); | ||
748 | prepare_to_resend_writes(data); | ||
749 | data->mds_ops->rpc_release(data); | ||
750 | return -EAGAIN; | ||
751 | } | ||
752 | dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how); | ||
753 | data->write_done_cb = filelayout_commit_done_cb; | ||
754 | data->ds_clp = ds->ds_clp; | ||
755 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | ||
756 | if (fh) | ||
757 | data->args.fh = fh; | ||
758 | return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient, | ||
759 | &filelayout_commit_call_ops, how); | ||
760 | } | ||
761 | |||
762 | /* | ||
763 | * This is only useful while we are using whole file layouts. | ||
764 | */ | ||
765 | static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode) | ||
766 | { | ||
767 | struct pnfs_layout_segment *lseg, *rv = NULL; | ||
768 | |||
769 | spin_lock(&inode->i_lock); | ||
770 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | ||
771 | if (lseg->pls_range.iomode == IOMODE_RW) | ||
772 | rv = get_lseg(lseg); | ||
773 | spin_unlock(&inode->i_lock); | ||
774 | return rv; | ||
775 | } | ||
776 | |||
777 | static int alloc_ds_commits(struct inode *inode, struct list_head *list) | ||
778 | { | ||
779 | struct pnfs_layout_segment *lseg; | ||
780 | struct nfs4_filelayout_segment *fl; | ||
781 | struct nfs_write_data *data; | ||
782 | int i, j; | ||
783 | |||
784 | /* Won't need this when non-whole file layout segments are supported | ||
785 | * instead we will use a pnfs_layout_hdr structure */ | ||
786 | lseg = find_only_write_lseg(inode); | ||
787 | if (!lseg) | ||
788 | return 0; | ||
789 | fl = FILELAYOUT_LSEG(lseg); | ||
790 | for (i = 0; i < fl->number_of_buckets; i++) { | ||
791 | if (list_empty(&fl->commit_buckets[i])) | ||
792 | continue; | ||
793 | data = nfs_commitdata_alloc(); | ||
794 | if (!data) | ||
795 | goto out_bad; | ||
796 | data->ds_commit_index = i; | ||
797 | data->lseg = lseg; | ||
798 | list_add(&data->pages, list); | ||
799 | } | ||
800 | put_lseg(lseg); | ||
801 | return 0; | ||
802 | |||
803 | out_bad: | ||
804 | for (j = i; j < fl->number_of_buckets; j++) { | ||
805 | if (list_empty(&fl->commit_buckets[i])) | ||
806 | continue; | ||
807 | nfs_retry_commit(&fl->commit_buckets[i], lseg); | ||
808 | put_lseg(lseg); /* associated with emptying bucket */ | ||
809 | } | ||
810 | put_lseg(lseg); | ||
811 | /* Caller will clean up entries put on list */ | ||
812 | return -ENOMEM; | ||
813 | } | ||
814 | |||
815 | /* This follows nfs_commit_list pretty closely */ | ||
816 | static int | ||
817 | filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||
818 | int how) | ||
819 | { | ||
820 | struct nfs_write_data *data, *tmp; | ||
821 | LIST_HEAD(list); | ||
822 | |||
823 | if (!list_empty(mds_pages)) { | ||
824 | data = nfs_commitdata_alloc(); | ||
825 | if (!data) | ||
826 | goto out_bad; | ||
827 | data->lseg = NULL; | ||
828 | list_add(&data->pages, &list); | ||
829 | } | ||
830 | |||
831 | if (alloc_ds_commits(inode, &list)) | ||
832 | goto out_bad; | ||
833 | |||
834 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
835 | list_del_init(&data->pages); | ||
836 | atomic_inc(&NFS_I(inode)->commits_outstanding); | ||
837 | if (!data->lseg) { | ||
838 | nfs_init_commit(data, mds_pages, NULL); | ||
839 | nfs_initiate_commit(data, NFS_CLIENT(inode), | ||
840 | data->mds_ops, how); | ||
841 | } else { | ||
842 | nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg); | ||
843 | filelayout_initiate_commit(data, how); | ||
844 | } | ||
845 | } | ||
846 | return 0; | ||
847 | out_bad: | ||
848 | list_for_each_entry_safe(data, tmp, &list, pages) { | ||
849 | nfs_retry_commit(&data->pages, data->lseg); | ||
850 | list_del_init(&data->pages); | ||
851 | nfs_commit_free(data); | ||
852 | } | ||
853 | nfs_retry_commit(mds_pages, NULL); | ||
854 | nfs_commit_clear_lock(NFS_I(inode)); | ||
855 | return -ENOMEM; | ||
856 | } | ||
857 | |||
555 | static struct pnfs_layoutdriver_type filelayout_type = { | 858 | static struct pnfs_layoutdriver_type filelayout_type = { |
556 | .id = LAYOUT_NFSV4_1_FILES, | 859 | .id = LAYOUT_NFSV4_1_FILES, |
557 | .name = "LAYOUT_NFSV4_1_FILES", | 860 | .name = "LAYOUT_NFSV4_1_FILES", |
@@ -559,6 +862,9 @@ static struct pnfs_layoutdriver_type filelayout_type = { | |||
559 | .alloc_lseg = filelayout_alloc_lseg, | 862 | .alloc_lseg = filelayout_alloc_lseg, |
560 | .free_lseg = filelayout_free_lseg, | 863 | .free_lseg = filelayout_free_lseg, |
561 | .pg_test = filelayout_pg_test, | 864 | .pg_test = filelayout_pg_test, |
865 | .mark_pnfs_commit = filelayout_mark_pnfs_commit, | ||
866 | .choose_commit_list = filelayout_choose_commit_list, | ||
867 | .commit_pagelist = filelayout_commit_pagelist, | ||
562 | .read_pagelist = filelayout_read_pagelist, | 868 | .read_pagelist = filelayout_read_pagelist, |
563 | .write_pagelist = filelayout_write_pagelist, | 869 | .write_pagelist = filelayout_write_pagelist, |
564 | }; | 870 | }; |
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index ee0c907742b5..085a354e0f08 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -79,6 +79,8 @@ struct nfs4_filelayout_segment { | |||
79 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ | 79 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ |
80 | unsigned int num_fh; | 80 | unsigned int num_fh; |
81 | struct nfs_fh **fh_array; | 81 | struct nfs_fh **fh_array; |
82 | struct list_head *commit_buckets; /* Sort commits to ds */ | ||
83 | int number_of_buckets; | ||
82 | }; | 84 | }; |
83 | 85 | ||
84 | static inline struct nfs4_filelayout_segment * | 86 | static inline struct nfs4_filelayout_segment * |
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 68143c162e3b..de5350f2b249 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -261,7 +261,7 @@ out: | |||
261 | * Currently only support ipv4, and one multi-path address. | 261 | * Currently only support ipv4, and one multi-path address. |
262 | */ | 262 | */ |
263 | static struct nfs4_pnfs_ds * | 263 | static struct nfs4_pnfs_ds * |
264 | decode_and_add_ds(__be32 **pp, struct inode *inode) | 264 | decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) |
265 | { | 265 | { |
266 | struct nfs4_pnfs_ds *ds = NULL; | 266 | struct nfs4_pnfs_ds *ds = NULL; |
267 | char *buf; | 267 | char *buf; |
@@ -269,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
269 | u32 ip_addr, port; | 269 | u32 ip_addr, port; |
270 | int nlen, rlen, i; | 270 | int nlen, rlen, i; |
271 | int tmp[2]; | 271 | int tmp[2]; |
272 | __be32 *r_netid, *r_addr, *p = *pp; | 272 | __be32 *p; |
273 | 273 | ||
274 | /* r_netid */ | 274 | /* r_netid */ |
275 | p = xdr_inline_decode(streamp, 4); | ||
276 | if (unlikely(!p)) | ||
277 | goto out_err; | ||
275 | nlen = be32_to_cpup(p++); | 278 | nlen = be32_to_cpup(p++); |
276 | r_netid = p; | ||
277 | p += XDR_QUADLEN(nlen); | ||
278 | 279 | ||
279 | /* r_addr */ | 280 | p = xdr_inline_decode(streamp, nlen); |
280 | rlen = be32_to_cpup(p++); | 281 | if (unlikely(!p)) |
281 | r_addr = p; | 282 | goto out_err; |
282 | p += XDR_QUADLEN(rlen); | ||
283 | *pp = p; | ||
284 | 283 | ||
285 | /* Check that netid is "tcp" */ | 284 | /* Check that netid is "tcp" */ |
286 | if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { | 285 | if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { |
287 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | 286 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); |
288 | goto out_err; | 287 | goto out_err; |
289 | } | 288 | } |
290 | 289 | ||
290 | /* r_addr */ | ||
291 | p = xdr_inline_decode(streamp, 4); | ||
292 | if (unlikely(!p)) | ||
293 | goto out_err; | ||
294 | rlen = be32_to_cpup(p); | ||
295 | |||
296 | p = xdr_inline_decode(streamp, rlen); | ||
297 | if (unlikely(!p)) | ||
298 | goto out_err; | ||
299 | |||
291 | /* ipv6 length plus port is legal */ | 300 | /* ipv6 length plus port is legal */ |
292 | if (rlen > INET6_ADDRSTRLEN + 8) { | 301 | if (rlen > INET6_ADDRSTRLEN + 8) { |
293 | dprintk("%s: Invalid address, length %d\n", __func__, | 302 | dprintk("%s: Invalid address, length %d\n", __func__, |
@@ -300,7 +309,7 @@ decode_and_add_ds(__be32 **pp, struct inode *inode) | |||
300 | goto out_err; | 309 | goto out_err; |
301 | } | 310 | } |
302 | buf[rlen] = '\0'; | 311 | buf[rlen] = '\0'; |
303 | memcpy(buf, r_addr, rlen); | 312 | memcpy(buf, p, rlen); |
304 | 313 | ||
305 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | 314 | /* replace the port dots with dashes for the in4_pton() delimiter*/ |
306 | for (i = 0; i < 2; i++) { | 315 | for (i = 0; i < 2; i++) { |
@@ -336,90 +345,154 @@ out_err: | |||
336 | static struct nfs4_file_layout_dsaddr* | 345 | static struct nfs4_file_layout_dsaddr* |
337 | decode_device(struct inode *ino, struct pnfs_device *pdev) | 346 | decode_device(struct inode *ino, struct pnfs_device *pdev) |
338 | { | 347 | { |
339 | int i, dummy; | 348 | int i; |
340 | u32 cnt, num; | 349 | u32 cnt, num; |
341 | u8 *indexp; | 350 | u8 *indexp; |
342 | __be32 *p = (__be32 *)pdev->area, *indicesp; | 351 | __be32 *p; |
343 | struct nfs4_file_layout_dsaddr *dsaddr; | 352 | u8 *stripe_indices; |
353 | u8 max_stripe_index; | ||
354 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
355 | struct xdr_stream stream; | ||
356 | struct xdr_buf buf = { | ||
357 | .pages = pdev->pages, | ||
358 | .page_len = pdev->pglen, | ||
359 | .buflen = pdev->pglen, | ||
360 | .len = pdev->pglen, | ||
361 | }; | ||
362 | struct page *scratch; | ||
363 | |||
364 | /* set up xdr stream */ | ||
365 | scratch = alloc_page(GFP_KERNEL); | ||
366 | if (!scratch) | ||
367 | goto out_err; | ||
368 | |||
369 | xdr_init_decode(&stream, &buf, NULL); | ||
370 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
344 | 371 | ||
345 | /* Get the stripe count (number of stripe index) */ | 372 | /* Get the stripe count (number of stripe index) */ |
346 | cnt = be32_to_cpup(p++); | 373 | p = xdr_inline_decode(&stream, 4); |
374 | if (unlikely(!p)) | ||
375 | goto out_err_free_scratch; | ||
376 | |||
377 | cnt = be32_to_cpup(p); | ||
347 | dprintk("%s stripe count %d\n", __func__, cnt); | 378 | dprintk("%s stripe count %d\n", __func__, cnt); |
348 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | 379 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { |
349 | printk(KERN_WARNING "%s: stripe count %d greater than " | 380 | printk(KERN_WARNING "%s: stripe count %d greater than " |
350 | "supported maximum %d\n", __func__, | 381 | "supported maximum %d\n", __func__, |
351 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | 382 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); |
352 | goto out_err; | 383 | goto out_err_free_scratch; |
384 | } | ||
385 | |||
386 | /* read stripe indices */ | ||
387 | stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); | ||
388 | if (!stripe_indices) | ||
389 | goto out_err_free_scratch; | ||
390 | |||
391 | p = xdr_inline_decode(&stream, cnt << 2); | ||
392 | if (unlikely(!p)) | ||
393 | goto out_err_free_stripe_indices; | ||
394 | |||
395 | indexp = &stripe_indices[0]; | ||
396 | max_stripe_index = 0; | ||
397 | for (i = 0; i < cnt; i++) { | ||
398 | *indexp = be32_to_cpup(p++); | ||
399 | max_stripe_index = max(max_stripe_index, *indexp); | ||
400 | indexp++; | ||
353 | } | 401 | } |
354 | 402 | ||
355 | /* Check the multipath list count */ | 403 | /* Check the multipath list count */ |
356 | indicesp = p; | 404 | p = xdr_inline_decode(&stream, 4); |
357 | p += XDR_QUADLEN(cnt << 2); | 405 | if (unlikely(!p)) |
358 | num = be32_to_cpup(p++); | 406 | goto out_err_free_stripe_indices; |
407 | |||
408 | num = be32_to_cpup(p); | ||
359 | dprintk("%s ds_num %u\n", __func__, num); | 409 | dprintk("%s ds_num %u\n", __func__, num); |
360 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | 410 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { |
361 | printk(KERN_WARNING "%s: multipath count %d greater than " | 411 | printk(KERN_WARNING "%s: multipath count %d greater than " |
362 | "supported maximum %d\n", __func__, | 412 | "supported maximum %d\n", __func__, |
363 | num, NFS4_PNFS_MAX_MULTI_CNT); | 413 | num, NFS4_PNFS_MAX_MULTI_CNT); |
364 | goto out_err; | 414 | goto out_err_free_stripe_indices; |
365 | } | 415 | } |
416 | |||
417 | /* validate stripe indices are all < num */ | ||
418 | if (max_stripe_index >= num) { | ||
419 | printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n", | ||
420 | __func__, max_stripe_index, num); | ||
421 | goto out_err_free_stripe_indices; | ||
422 | } | ||
423 | |||
366 | dsaddr = kzalloc(sizeof(*dsaddr) + | 424 | dsaddr = kzalloc(sizeof(*dsaddr) + |
367 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | 425 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), |
368 | GFP_KERNEL); | 426 | GFP_KERNEL); |
369 | if (!dsaddr) | 427 | if (!dsaddr) |
370 | goto out_err; | 428 | goto out_err_free_stripe_indices; |
371 | |||
372 | dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); | ||
373 | if (!dsaddr->stripe_indices) | ||
374 | goto out_err_free; | ||
375 | 429 | ||
376 | dsaddr->stripe_count = cnt; | 430 | dsaddr->stripe_count = cnt; |
431 | dsaddr->stripe_indices = stripe_indices; | ||
432 | stripe_indices = NULL; | ||
377 | dsaddr->ds_num = num; | 433 | dsaddr->ds_num = num; |
378 | 434 | ||
379 | memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); | 435 | memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); |
380 | 436 | ||
381 | /* Go back an read stripe indices */ | ||
382 | p = indicesp; | ||
383 | indexp = &dsaddr->stripe_indices[0]; | ||
384 | for (i = 0; i < dsaddr->stripe_count; i++) { | ||
385 | *indexp = be32_to_cpup(p++); | ||
386 | if (*indexp >= num) | ||
387 | goto out_err_free; | ||
388 | indexp++; | ||
389 | } | ||
390 | /* Skip already read multipath list count */ | ||
391 | p++; | ||
392 | |||
393 | for (i = 0; i < dsaddr->ds_num; i++) { | 437 | for (i = 0; i < dsaddr->ds_num; i++) { |
394 | int j; | 438 | int j; |
439 | u32 mp_count; | ||
440 | |||
441 | p = xdr_inline_decode(&stream, 4); | ||
442 | if (unlikely(!p)) | ||
443 | goto out_err_free_deviceid; | ||
395 | 444 | ||
396 | dummy = be32_to_cpup(p++); /* multipath count */ | 445 | mp_count = be32_to_cpup(p); /* multipath count */ |
397 | if (dummy > 1) { | 446 | if (mp_count > 1) { |
398 | printk(KERN_WARNING | 447 | printk(KERN_WARNING |
399 | "%s: Multipath count %d not supported, " | 448 | "%s: Multipath count %d not supported, " |
400 | "skipping all greater than 1\n", __func__, | 449 | "skipping all greater than 1\n", __func__, |
401 | dummy); | 450 | mp_count); |
402 | } | 451 | } |
403 | for (j = 0; j < dummy; j++) { | 452 | for (j = 0; j < mp_count; j++) { |
404 | if (j == 0) { | 453 | if (j == 0) { |
405 | dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); | 454 | dsaddr->ds_list[i] = decode_and_add_ds(&stream, |
455 | ino); | ||
406 | if (dsaddr->ds_list[i] == NULL) | 456 | if (dsaddr->ds_list[i] == NULL) |
407 | goto out_err_free; | 457 | goto out_err_free_deviceid; |
408 | } else { | 458 | } else { |
409 | u32 len; | 459 | u32 len; |
410 | /* skip extra multipath */ | 460 | /* skip extra multipath */ |
411 | len = be32_to_cpup(p++); | 461 | |
412 | p += XDR_QUADLEN(len); | 462 | /* read len, skip */ |
413 | len = be32_to_cpup(p++); | 463 | p = xdr_inline_decode(&stream, 4); |
414 | p += XDR_QUADLEN(len); | 464 | if (unlikely(!p)) |
415 | continue; | 465 | goto out_err_free_deviceid; |
466 | len = be32_to_cpup(p); | ||
467 | |||
468 | p = xdr_inline_decode(&stream, len); | ||
469 | if (unlikely(!p)) | ||
470 | goto out_err_free_deviceid; | ||
471 | |||
472 | /* read len, skip */ | ||
473 | p = xdr_inline_decode(&stream, 4); | ||
474 | if (unlikely(!p)) | ||
475 | goto out_err_free_deviceid; | ||
476 | len = be32_to_cpup(p); | ||
477 | |||
478 | p = xdr_inline_decode(&stream, len); | ||
479 | if (unlikely(!p)) | ||
480 | goto out_err_free_deviceid; | ||
416 | } | 481 | } |
417 | } | 482 | } |
418 | } | 483 | } |
484 | |||
485 | __free_page(scratch); | ||
419 | return dsaddr; | 486 | return dsaddr; |
420 | 487 | ||
421 | out_err_free: | 488 | out_err_free_deviceid: |
422 | nfs4_fl_free_deviceid(dsaddr); | 489 | nfs4_fl_free_deviceid(dsaddr); |
490 | /* stripe_indicies was part of dsaddr */ | ||
491 | goto out_err_free_scratch; | ||
492 | out_err_free_stripe_indices: | ||
493 | kfree(stripe_indices); | ||
494 | out_err_free_scratch: | ||
495 | __free_page(scratch); | ||
423 | out_err: | 496 | out_err: |
424 | dprintk("%s ERROR: returning NULL\n", __func__); | 497 | dprintk("%s ERROR: returning NULL\n", __func__); |
425 | return NULL; | 498 | return NULL; |
@@ -498,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | |||
498 | goto out_free; | 571 | goto out_free; |
499 | } | 572 | } |
500 | 573 | ||
501 | /* set pdev->area */ | ||
502 | pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); | ||
503 | if (!pdev->area) | ||
504 | goto out_free; | ||
505 | |||
506 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | 574 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); |
507 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | 575 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; |
508 | pdev->pages = pages; | 576 | pdev->pages = pages; |
@@ -521,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | |||
521 | */ | 589 | */ |
522 | dsaddr = decode_and_add_device(inode, pdev); | 590 | dsaddr = decode_and_add_device(inode, pdev); |
523 | out_free: | 591 | out_free: |
524 | if (pdev->area != NULL) | ||
525 | vunmap(pdev->area); | ||
526 | for (i = 0; i < max_pages; i++) | 592 | for (i = 0; i < max_pages; i++) |
527 | __free_page(pages[i]); | 593 | __free_page(pages[i]); |
528 | kfree(pages); | 594 | kfree(pages); |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1d84e7088af9..dfd1e6d7e6c3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <linux/string.h> | 41 | #include <linux/string.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/sunrpc/clnt.h> | 43 | #include <linux/sunrpc/clnt.h> |
44 | #include <linux/sunrpc/gss_api.h> | ||
44 | #include <linux/nfs.h> | 45 | #include <linux/nfs.h> |
45 | #include <linux/nfs4.h> | 46 | #include <linux/nfs4.h> |
46 | #include <linux/nfs_fs.h> | 47 | #include <linux/nfs_fs.h> |
@@ -71,7 +72,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data); | |||
71 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); | 72 | static int _nfs4_recover_proc_open(struct nfs4_opendata *data); |
72 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); | 73 | static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); |
73 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); | 74 | static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); |
74 | static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 75 | static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir, |
76 | const struct qstr *name, struct nfs_fh *fhandle, | ||
77 | struct nfs_fattr *fattr); | ||
75 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); | 78 | static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); |
76 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | 79 | static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, |
77 | struct nfs_fattr *fattr, struct iattr *sattr, | 80 | struct nfs_fattr *fattr, struct iattr *sattr, |
@@ -85,6 +88,8 @@ static int nfs4_map_errors(int err) | |||
85 | switch (err) { | 88 | switch (err) { |
86 | case -NFS4ERR_RESOURCE: | 89 | case -NFS4ERR_RESOURCE: |
87 | return -EREMOTEIO; | 90 | return -EREMOTEIO; |
91 | case -NFS4ERR_WRONGSEC: | ||
92 | return -EPERM; | ||
88 | case -NFS4ERR_BADOWNER: | 93 | case -NFS4ERR_BADOWNER: |
89 | case -NFS4ERR_BADNAME: | 94 | case -NFS4ERR_BADNAME: |
90 | return -EINVAL; | 95 | return -EINVAL; |
@@ -657,7 +662,8 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = { | |||
657 | .rpc_call_done = nfs41_call_sync_done, | 662 | .rpc_call_done = nfs41_call_sync_done, |
658 | }; | 663 | }; |
659 | 664 | ||
660 | static int nfs4_call_sync_sequence(struct nfs_server *server, | 665 | static int nfs4_call_sync_sequence(struct rpc_clnt *clnt, |
666 | struct nfs_server *server, | ||
661 | struct rpc_message *msg, | 667 | struct rpc_message *msg, |
662 | struct nfs4_sequence_args *args, | 668 | struct nfs4_sequence_args *args, |
663 | struct nfs4_sequence_res *res, | 669 | struct nfs4_sequence_res *res, |
@@ -673,7 +679,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, | |||
673 | .cache_reply = cache_reply, | 679 | .cache_reply = cache_reply, |
674 | }; | 680 | }; |
675 | struct rpc_task_setup task_setup = { | 681 | struct rpc_task_setup task_setup = { |
676 | .rpc_client = server->client, | 682 | .rpc_client = clnt, |
677 | .rpc_message = msg, | 683 | .rpc_message = msg, |
678 | .callback_ops = &nfs41_call_sync_ops, | 684 | .callback_ops = &nfs41_call_sync_ops, |
679 | .callback_data = &data | 685 | .callback_data = &data |
@@ -692,13 +698,14 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, | |||
692 | return ret; | 698 | return ret; |
693 | } | 699 | } |
694 | 700 | ||
695 | int _nfs4_call_sync_session(struct nfs_server *server, | 701 | int _nfs4_call_sync_session(struct rpc_clnt *clnt, |
702 | struct nfs_server *server, | ||
696 | struct rpc_message *msg, | 703 | struct rpc_message *msg, |
697 | struct nfs4_sequence_args *args, | 704 | struct nfs4_sequence_args *args, |
698 | struct nfs4_sequence_res *res, | 705 | struct nfs4_sequence_res *res, |
699 | int cache_reply) | 706 | int cache_reply) |
700 | { | 707 | { |
701 | return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); | 708 | return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0); |
702 | } | 709 | } |
703 | 710 | ||
704 | #else | 711 | #else |
@@ -709,19 +716,28 @@ static int nfs4_sequence_done(struct rpc_task *task, | |||
709 | } | 716 | } |
710 | #endif /* CONFIG_NFS_V4_1 */ | 717 | #endif /* CONFIG_NFS_V4_1 */ |
711 | 718 | ||
712 | int _nfs4_call_sync(struct nfs_server *server, | 719 | int _nfs4_call_sync(struct rpc_clnt *clnt, |
720 | struct nfs_server *server, | ||
713 | struct rpc_message *msg, | 721 | struct rpc_message *msg, |
714 | struct nfs4_sequence_args *args, | 722 | struct nfs4_sequence_args *args, |
715 | struct nfs4_sequence_res *res, | 723 | struct nfs4_sequence_res *res, |
716 | int cache_reply) | 724 | int cache_reply) |
717 | { | 725 | { |
718 | args->sa_session = res->sr_session = NULL; | 726 | args->sa_session = res->sr_session = NULL; |
719 | return rpc_call_sync(server->client, msg, 0); | 727 | return rpc_call_sync(clnt, msg, 0); |
720 | } | 728 | } |
721 | 729 | ||
722 | #define nfs4_call_sync(server, msg, args, res, cache_reply) \ | 730 | static inline |
723 | (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ | 731 | int nfs4_call_sync(struct rpc_clnt *clnt, |
724 | &(res)->seq_res, (cache_reply)) | 732 | struct nfs_server *server, |
733 | struct rpc_message *msg, | ||
734 | struct nfs4_sequence_args *args, | ||
735 | struct nfs4_sequence_res *res, | ||
736 | int cache_reply) | ||
737 | { | ||
738 | return server->nfs_client->cl_mvops->call_sync(clnt, server, msg, | ||
739 | args, res, cache_reply); | ||
740 | } | ||
725 | 741 | ||
726 | static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) | 742 | static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) |
727 | { | 743 | { |
@@ -1831,7 +1847,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, | |||
1831 | } else | 1847 | } else |
1832 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); | 1848 | memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); |
1833 | 1849 | ||
1834 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 1850 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
1835 | if (status == 0 && state != NULL) | 1851 | if (status == 0 && state != NULL) |
1836 | renew_lease(server, timestamp); | 1852 | renew_lease(server, timestamp); |
1837 | return status; | 1853 | return status; |
@@ -2090,7 +2106,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f | |||
2090 | }; | 2106 | }; |
2091 | int status; | 2107 | int status; |
2092 | 2108 | ||
2093 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2109 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2094 | if (status == 0) { | 2110 | if (status == 0) { |
2095 | memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); | 2111 | memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); |
2096 | server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| | 2112 | server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| |
@@ -2160,7 +2176,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2160 | }; | 2176 | }; |
2161 | 2177 | ||
2162 | nfs_fattr_init(info->fattr); | 2178 | nfs_fattr_init(info->fattr); |
2163 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 2179 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2164 | } | 2180 | } |
2165 | 2181 | ||
2166 | static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2182 | static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, |
@@ -2176,15 +2192,43 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2176 | return err; | 2192 | return err; |
2177 | } | 2193 | } |
2178 | 2194 | ||
2195 | static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, | ||
2196 | struct nfs_fsinfo *info, rpc_authflavor_t flavor) | ||
2197 | { | ||
2198 | struct rpc_auth *auth; | ||
2199 | int ret; | ||
2200 | |||
2201 | auth = rpcauth_create(flavor, server->client); | ||
2202 | if (!auth) { | ||
2203 | ret = -EIO; | ||
2204 | goto out; | ||
2205 | } | ||
2206 | ret = nfs4_lookup_root(server, fhandle, info); | ||
2207 | if (ret < 0) | ||
2208 | ret = -EAGAIN; | ||
2209 | out: | ||
2210 | return ret; | ||
2211 | } | ||
2212 | |||
2179 | /* | 2213 | /* |
2180 | * get the file handle for the "/" directory on the server | 2214 | * get the file handle for the "/" directory on the server |
2181 | */ | 2215 | */ |
2182 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | 2216 | static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, |
2183 | struct nfs_fsinfo *info) | 2217 | struct nfs_fsinfo *info) |
2184 | { | 2218 | { |
2185 | int status; | 2219 | int i, len, status = 0; |
2220 | rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS + 2]; | ||
2221 | |||
2222 | flav_array[0] = RPC_AUTH_UNIX; | ||
2223 | len = gss_mech_list_pseudoflavors(&flav_array[1]); | ||
2224 | flav_array[1+len] = RPC_AUTH_NULL; | ||
2225 | len += 2; | ||
2186 | 2226 | ||
2187 | status = nfs4_lookup_root(server, fhandle, info); | 2227 | for (i = 0; i < len; i++) { |
2228 | status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); | ||
2229 | if (status == 0) | ||
2230 | break; | ||
2231 | } | ||
2188 | if (status == 0) | 2232 | if (status == 0) |
2189 | status = nfs4_server_capabilities(server, fhandle); | 2233 | status = nfs4_server_capabilities(server, fhandle); |
2190 | if (status == 0) | 2234 | if (status == 0) |
@@ -2249,7 +2293,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2249 | }; | 2293 | }; |
2250 | 2294 | ||
2251 | nfs_fattr_init(fattr); | 2295 | nfs_fattr_init(fattr); |
2252 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 2296 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2253 | } | 2297 | } |
2254 | 2298 | ||
2255 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2299 | static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
@@ -2309,9 +2353,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
2309 | return status; | 2353 | return status; |
2310 | } | 2354 | } |
2311 | 2355 | ||
2312 | static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, | 2356 | static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server, |
2313 | const struct qstr *name, struct nfs_fh *fhandle, | 2357 | const struct nfs_fh *dirfh, const struct qstr *name, |
2314 | struct nfs_fattr *fattr) | 2358 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
2315 | { | 2359 | { |
2316 | int status; | 2360 | int status; |
2317 | struct nfs4_lookup_arg args = { | 2361 | struct nfs4_lookup_arg args = { |
@@ -2333,7 +2377,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d | |||
2333 | nfs_fattr_init(fattr); | 2377 | nfs_fattr_init(fattr); |
2334 | 2378 | ||
2335 | dprintk("NFS call lookupfh %s\n", name->name); | 2379 | dprintk("NFS call lookupfh %s\n", name->name); |
2336 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2380 | status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0); |
2337 | dprintk("NFS reply lookupfh: %d\n", status); | 2381 | dprintk("NFS reply lookupfh: %d\n", status); |
2338 | return status; | 2382 | return status; |
2339 | } | 2383 | } |
@@ -2345,7 +2389,7 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, | |||
2345 | struct nfs4_exception exception = { }; | 2389 | struct nfs4_exception exception = { }; |
2346 | int err; | 2390 | int err; |
2347 | do { | 2391 | do { |
2348 | err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); | 2392 | err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr); |
2349 | /* FIXME: !!!! */ | 2393 | /* FIXME: !!!! */ |
2350 | if (err == -NFS4ERR_MOVED) { | 2394 | if (err == -NFS4ERR_MOVED) { |
2351 | err = -EREMOTE; | 2395 | err = -EREMOTE; |
@@ -2356,27 +2400,41 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh, | |||
2356 | return err; | 2400 | return err; |
2357 | } | 2401 | } |
2358 | 2402 | ||
2359 | static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, | 2403 | static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, |
2360 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2404 | const struct qstr *name, struct nfs_fh *fhandle, |
2405 | struct nfs_fattr *fattr) | ||
2361 | { | 2406 | { |
2362 | int status; | 2407 | int status; |
2363 | 2408 | ||
2364 | dprintk("NFS call lookup %s\n", name->name); | 2409 | dprintk("NFS call lookup %s\n", name->name); |
2365 | status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); | 2410 | status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); |
2366 | if (status == -NFS4ERR_MOVED) | 2411 | if (status == -NFS4ERR_MOVED) |
2367 | status = nfs4_get_referral(dir, name, fattr, fhandle); | 2412 | status = nfs4_get_referral(dir, name, fattr, fhandle); |
2368 | dprintk("NFS reply lookup: %d\n", status); | 2413 | dprintk("NFS reply lookup: %d\n", status); |
2369 | return status; | 2414 | return status; |
2370 | } | 2415 | } |
2371 | 2416 | ||
2372 | static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 2417 | void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh) |
2418 | { | ||
2419 | memset(fh, 0, sizeof(struct nfs_fh)); | ||
2420 | fattr->fsid.major = 1; | ||
2421 | fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | | ||
2422 | NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT; | ||
2423 | fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; | ||
2424 | fattr->nlink = 2; | ||
2425 | } | ||
2426 | |||
2427 | static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, | ||
2428 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | ||
2373 | { | 2429 | { |
2374 | struct nfs4_exception exception = { }; | 2430 | struct nfs4_exception exception = { }; |
2375 | int err; | 2431 | int err; |
2376 | do { | 2432 | do { |
2377 | err = nfs4_handle_exception(NFS_SERVER(dir), | 2433 | err = nfs4_handle_exception(NFS_SERVER(dir), |
2378 | _nfs4_proc_lookup(dir, name, fhandle, fattr), | 2434 | _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr), |
2379 | &exception); | 2435 | &exception); |
2436 | if (err == -EPERM) | ||
2437 | nfs_fixup_secinfo_attributes(fattr, fhandle); | ||
2380 | } while (exception.retry); | 2438 | } while (exception.retry); |
2381 | return err; | 2439 | return err; |
2382 | } | 2440 | } |
@@ -2421,7 +2479,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry | |||
2421 | if (res.fattr == NULL) | 2479 | if (res.fattr == NULL) |
2422 | return -ENOMEM; | 2480 | return -ENOMEM; |
2423 | 2481 | ||
2424 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 2482 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
2425 | if (!status) { | 2483 | if (!status) { |
2426 | entry->mask = 0; | 2484 | entry->mask = 0; |
2427 | if (res.access & NFS4_ACCESS_READ) | 2485 | if (res.access & NFS4_ACCESS_READ) |
@@ -2488,7 +2546,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page, | |||
2488 | .rpc_resp = &res, | 2546 | .rpc_resp = &res, |
2489 | }; | 2547 | }; |
2490 | 2548 | ||
2491 | return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); | 2549 | return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); |
2492 | } | 2550 | } |
2493 | 2551 | ||
2494 | static int nfs4_proc_readlink(struct inode *inode, struct page *page, | 2552 | static int nfs4_proc_readlink(struct inode *inode, struct page *page, |
@@ -2577,7 +2635,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) | |||
2577 | if (res.dir_attr == NULL) | 2635 | if (res.dir_attr == NULL) |
2578 | goto out; | 2636 | goto out; |
2579 | 2637 | ||
2580 | status = nfs4_call_sync(server, &msg, &args, &res, 1); | 2638 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); |
2581 | if (status == 0) { | 2639 | if (status == 0) { |
2582 | update_changeattr(dir, &res.cinfo); | 2640 | update_changeattr(dir, &res.cinfo); |
2583 | nfs_post_op_update_inode(dir, res.dir_attr); | 2641 | nfs_post_op_update_inode(dir, res.dir_attr); |
@@ -2678,7 +2736,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | |||
2678 | if (res.old_fattr == NULL || res.new_fattr == NULL) | 2736 | if (res.old_fattr == NULL || res.new_fattr == NULL) |
2679 | goto out; | 2737 | goto out; |
2680 | 2738 | ||
2681 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 2739 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
2682 | if (!status) { | 2740 | if (!status) { |
2683 | update_changeattr(old_dir, &res.old_cinfo); | 2741 | update_changeattr(old_dir, &res.old_cinfo); |
2684 | nfs_post_op_update_inode(old_dir, res.old_fattr); | 2742 | nfs_post_op_update_inode(old_dir, res.old_fattr); |
@@ -2729,7 +2787,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr * | |||
2729 | if (res.fattr == NULL || res.dir_attr == NULL) | 2787 | if (res.fattr == NULL || res.dir_attr == NULL) |
2730 | goto out; | 2788 | goto out; |
2731 | 2789 | ||
2732 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 2790 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
2733 | if (!status) { | 2791 | if (!status) { |
2734 | update_changeattr(dir, &res.cinfo); | 2792 | update_changeattr(dir, &res.cinfo); |
2735 | nfs_post_op_update_inode(dir, res.dir_attr); | 2793 | nfs_post_op_update_inode(dir, res.dir_attr); |
@@ -2792,8 +2850,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir, | |||
2792 | 2850 | ||
2793 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) | 2851 | static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) |
2794 | { | 2852 | { |
2795 | int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, | 2853 | int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg, |
2796 | &data->arg, &data->res, 1); | 2854 | &data->arg.seq_args, &data->res.seq_res, 1); |
2797 | if (status == 0) { | 2855 | if (status == 0) { |
2798 | update_changeattr(dir, &data->res.dir_cinfo); | 2856 | update_changeattr(dir, &data->res.dir_cinfo); |
2799 | nfs_post_op_update_inode(dir, data->res.dir_fattr); | 2857 | nfs_post_op_update_inode(dir, data->res.dir_fattr); |
@@ -2905,7 +2963,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
2905 | (unsigned long long)cookie); | 2963 | (unsigned long long)cookie); |
2906 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); | 2964 | nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); |
2907 | res.pgbase = args.pgbase; | 2965 | res.pgbase = args.pgbase; |
2908 | status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); | 2966 | status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); |
2909 | if (status >= 0) { | 2967 | if (status >= 0) { |
2910 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); | 2968 | memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); |
2911 | status += args.pgbase; | 2969 | status += args.pgbase; |
@@ -2997,7 +3055,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
2997 | }; | 3055 | }; |
2998 | 3056 | ||
2999 | nfs_fattr_init(fsstat->fattr); | 3057 | nfs_fattr_init(fsstat->fattr); |
3000 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 3058 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
3001 | } | 3059 | } |
3002 | 3060 | ||
3003 | static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) | 3061 | static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) |
@@ -3028,7 +3086,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | |||
3028 | .rpc_resp = &res, | 3086 | .rpc_resp = &res, |
3029 | }; | 3087 | }; |
3030 | 3088 | ||
3031 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 3089 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
3032 | } | 3090 | } |
3033 | 3091 | ||
3034 | static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) | 3092 | static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) |
@@ -3073,7 +3131,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle | |||
3073 | } | 3131 | } |
3074 | 3132 | ||
3075 | nfs_fattr_init(pathconf->fattr); | 3133 | nfs_fattr_init(pathconf->fattr); |
3076 | return nfs4_call_sync(server, &msg, &args, &res, 0); | 3134 | return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
3077 | } | 3135 | } |
3078 | 3136 | ||
3079 | static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | 3137 | static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, |
@@ -3195,12 +3253,9 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag | |||
3195 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; | 3253 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; |
3196 | } | 3254 | } |
3197 | 3255 | ||
3198 | static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | 3256 | static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data) |
3199 | { | 3257 | { |
3200 | struct inode *inode = data->inode; | 3258 | struct inode *inode = data->inode; |
3201 | |||
3202 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
3203 | return -EAGAIN; | ||
3204 | 3259 | ||
3205 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { | 3260 | if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { |
3206 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); | 3261 | nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); |
@@ -3210,11 +3265,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | |||
3210 | return 0; | 3265 | return 0; |
3211 | } | 3266 | } |
3212 | 3267 | ||
3268 | static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) | ||
3269 | { | ||
3270 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
3271 | return -EAGAIN; | ||
3272 | return data->write_done_cb(task, data); | ||
3273 | } | ||
3274 | |||
3213 | static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) | 3275 | static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) |
3214 | { | 3276 | { |
3215 | struct nfs_server *server = NFS_SERVER(data->inode); | 3277 | struct nfs_server *server = NFS_SERVER(data->inode); |
3216 | 3278 | ||
3217 | data->args.bitmask = server->cache_consistency_bitmask; | 3279 | if (data->lseg) { |
3280 | data->args.bitmask = NULL; | ||
3281 | data->res.fattr = NULL; | ||
3282 | } else | ||
3283 | data->args.bitmask = server->cache_consistency_bitmask; | ||
3284 | if (!data->write_done_cb) | ||
3285 | data->write_done_cb = nfs4_commit_done_cb; | ||
3218 | data->res.server = server; | 3286 | data->res.server = server; |
3219 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; | 3287 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; |
3220 | } | 3288 | } |
@@ -3452,7 +3520,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu | |||
3452 | resp_buf = buf; | 3520 | resp_buf = buf; |
3453 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); | 3521 | buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); |
3454 | } | 3522 | } |
3455 | ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); | 3523 | ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0); |
3456 | if (ret) | 3524 | if (ret) |
3457 | goto out_free; | 3525 | goto out_free; |
3458 | if (res.acl_len > args.acl_len) | 3526 | if (res.acl_len > args.acl_len) |
@@ -3527,7 +3595,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl | |||
3527 | if (i < 0) | 3595 | if (i < 0) |
3528 | return i; | 3596 | return i; |
3529 | nfs_inode_return_delegation(inode); | 3597 | nfs_inode_return_delegation(inode); |
3530 | ret = nfs4_call_sync(server, &msg, &arg, &res, 1); | 3598 | ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
3531 | 3599 | ||
3532 | /* | 3600 | /* |
3533 | * Free each page after tx, so the only ref left is | 3601 | * Free each page after tx, so the only ref left is |
@@ -3890,7 +3958,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock | |||
3890 | lsp = request->fl_u.nfs4_fl.owner; | 3958 | lsp = request->fl_u.nfs4_fl.owner; |
3891 | arg.lock_owner.id = lsp->ls_id.id; | 3959 | arg.lock_owner.id = lsp->ls_id.id; |
3892 | arg.lock_owner.s_dev = server->s_dev; | 3960 | arg.lock_owner.s_dev = server->s_dev; |
3893 | status = nfs4_call_sync(server, &msg, &arg, &res, 1); | 3961 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); |
3894 | switch (status) { | 3962 | switch (status) { |
3895 | case 0: | 3963 | case 0: |
3896 | request->fl_type = F_UNLCK; | 3964 | request->fl_type = F_UNLCK; |
@@ -4618,12 +4686,46 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | |||
4618 | nfs_fattr_init(&fs_locations->fattr); | 4686 | nfs_fattr_init(&fs_locations->fattr); |
4619 | fs_locations->server = server; | 4687 | fs_locations->server = server; |
4620 | fs_locations->nlocations = 0; | 4688 | fs_locations->nlocations = 0; |
4621 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 4689 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
4622 | nfs_fixup_referral_attributes(&fs_locations->fattr); | 4690 | nfs_fixup_referral_attributes(&fs_locations->fattr); |
4623 | dprintk("%s: returned status = %d\n", __func__, status); | 4691 | dprintk("%s: returned status = %d\n", __func__, status); |
4624 | return status; | 4692 | return status; |
4625 | } | 4693 | } |
4626 | 4694 | ||
4695 | static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) | ||
4696 | { | ||
4697 | int status; | ||
4698 | struct nfs4_secinfo_arg args = { | ||
4699 | .dir_fh = NFS_FH(dir), | ||
4700 | .name = name, | ||
4701 | }; | ||
4702 | struct nfs4_secinfo_res res = { | ||
4703 | .flavors = flavors, | ||
4704 | }; | ||
4705 | struct rpc_message msg = { | ||
4706 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO], | ||
4707 | .rpc_argp = &args, | ||
4708 | .rpc_resp = &res, | ||
4709 | }; | ||
4710 | |||
4711 | dprintk("NFS call secinfo %s\n", name->name); | ||
4712 | status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); | ||
4713 | dprintk("NFS reply secinfo: %d\n", status); | ||
4714 | return status; | ||
4715 | } | ||
4716 | |||
4717 | int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) | ||
4718 | { | ||
4719 | struct nfs4_exception exception = { }; | ||
4720 | int err; | ||
4721 | do { | ||
4722 | err = nfs4_handle_exception(NFS_SERVER(dir), | ||
4723 | _nfs4_proc_secinfo(dir, name, flavors), | ||
4724 | &exception); | ||
4725 | } while (exception.retry); | ||
4726 | return err; | ||
4727 | } | ||
4728 | |||
4627 | #ifdef CONFIG_NFS_V4_1 | 4729 | #ifdef CONFIG_NFS_V4_1 |
4628 | /* | 4730 | /* |
4629 | * Check the exchange flags returned by the server for invalid flags, having | 4731 | * Check the exchange flags returned by the server for invalid flags, having |
@@ -5516,8 +5618,6 @@ static void nfs4_layoutget_release(void *calldata) | |||
5516 | struct nfs4_layoutget *lgp = calldata; | 5618 | struct nfs4_layoutget *lgp = calldata; |
5517 | 5619 | ||
5518 | dprintk("--> %s\n", __func__); | 5620 | dprintk("--> %s\n", __func__); |
5519 | if (lgp->res.layout.buf != NULL) | ||
5520 | free_page((unsigned long) lgp->res.layout.buf); | ||
5521 | put_nfs_open_context(lgp->args.ctx); | 5621 | put_nfs_open_context(lgp->args.ctx); |
5522 | kfree(calldata); | 5622 | kfree(calldata); |
5523 | dprintk("<-- %s\n", __func__); | 5623 | dprintk("<-- %s\n", __func__); |
@@ -5549,12 +5649,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | |||
5549 | 5649 | ||
5550 | dprintk("--> %s\n", __func__); | 5650 | dprintk("--> %s\n", __func__); |
5551 | 5651 | ||
5552 | lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); | 5652 | lgp->res.layoutp = &lgp->args.layout; |
5553 | if (lgp->res.layout.buf == NULL) { | ||
5554 | nfs4_layoutget_release(lgp); | ||
5555 | return -ENOMEM; | ||
5556 | } | ||
5557 | |||
5558 | lgp->res.seq_res.sr_slot = NULL; | 5653 | lgp->res.seq_res.sr_slot = NULL; |
5559 | task = rpc_run_task(&task_setup_data); | 5654 | task = rpc_run_task(&task_setup_data); |
5560 | if (IS_ERR(task)) | 5655 | if (IS_ERR(task)) |
@@ -5586,7 +5681,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
5586 | int status; | 5681 | int status; |
5587 | 5682 | ||
5588 | dprintk("--> %s\n", __func__); | 5683 | dprintk("--> %s\n", __func__); |
5589 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | 5684 | status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); |
5590 | dprintk("<-- %s status=%d\n", __func__, status); | 5685 | dprintk("<-- %s status=%d\n", __func__, status); |
5591 | 5686 | ||
5592 | return status; | 5687 | return status; |
@@ -5606,6 +5701,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | |||
5606 | } | 5701 | } |
5607 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); | 5702 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); |
5608 | 5703 | ||
5704 | static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata) | ||
5705 | { | ||
5706 | struct nfs4_layoutcommit_data *data = calldata; | ||
5707 | struct nfs_server *server = NFS_SERVER(data->args.inode); | ||
5708 | |||
5709 | if (nfs4_setup_sequence(server, &data->args.seq_args, | ||
5710 | &data->res.seq_res, 1, task)) | ||
5711 | return; | ||
5712 | rpc_call_start(task); | ||
5713 | } | ||
5714 | |||
5715 | static void | ||
5716 | nfs4_layoutcommit_done(struct rpc_task *task, void *calldata) | ||
5717 | { | ||
5718 | struct nfs4_layoutcommit_data *data = calldata; | ||
5719 | struct nfs_server *server = NFS_SERVER(data->args.inode); | ||
5720 | |||
5721 | if (!nfs4_sequence_done(task, &data->res.seq_res)) | ||
5722 | return; | ||
5723 | |||
5724 | switch (task->tk_status) { /* Just ignore these failures */ | ||
5725 | case NFS4ERR_DELEG_REVOKED: /* layout was recalled */ | ||
5726 | case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */ | ||
5727 | case NFS4ERR_BADLAYOUT: /* no layout */ | ||
5728 | case NFS4ERR_GRACE: /* loca_recalim always false */ | ||
5729 | task->tk_status = 0; | ||
5730 | } | ||
5731 | |||
5732 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
5733 | nfs_restart_rpc(task, server->nfs_client); | ||
5734 | return; | ||
5735 | } | ||
5736 | |||
5737 | if (task->tk_status == 0) | ||
5738 | nfs_post_op_update_inode_force_wcc(data->args.inode, | ||
5739 | data->res.fattr); | ||
5740 | } | ||
5741 | |||
5742 | static void nfs4_layoutcommit_release(void *calldata) | ||
5743 | { | ||
5744 | struct nfs4_layoutcommit_data *data = calldata; | ||
5745 | |||
5746 | /* Matched by references in pnfs_set_layoutcommit */ | ||
5747 | put_lseg(data->lseg); | ||
5748 | put_rpccred(data->cred); | ||
5749 | kfree(data); | ||
5750 | } | ||
5751 | |||
5752 | static const struct rpc_call_ops nfs4_layoutcommit_ops = { | ||
5753 | .rpc_call_prepare = nfs4_layoutcommit_prepare, | ||
5754 | .rpc_call_done = nfs4_layoutcommit_done, | ||
5755 | .rpc_release = nfs4_layoutcommit_release, | ||
5756 | }; | ||
5757 | |||
5758 | int | ||
5759 | nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync) | ||
5760 | { | ||
5761 | struct rpc_message msg = { | ||
5762 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT], | ||
5763 | .rpc_argp = &data->args, | ||
5764 | .rpc_resp = &data->res, | ||
5765 | .rpc_cred = data->cred, | ||
5766 | }; | ||
5767 | struct rpc_task_setup task_setup_data = { | ||
5768 | .task = &data->task, | ||
5769 | .rpc_client = NFS_CLIENT(data->args.inode), | ||
5770 | .rpc_message = &msg, | ||
5771 | .callback_ops = &nfs4_layoutcommit_ops, | ||
5772 | .callback_data = data, | ||
5773 | .flags = RPC_TASK_ASYNC, | ||
5774 | }; | ||
5775 | struct rpc_task *task; | ||
5776 | int status = 0; | ||
5777 | |||
5778 | dprintk("NFS: %4d initiating layoutcommit call. sync %d " | ||
5779 | "lbw: %llu inode %lu\n", | ||
5780 | data->task.tk_pid, sync, | ||
5781 | data->args.lastbytewritten, | ||
5782 | data->args.inode->i_ino); | ||
5783 | |||
5784 | task = rpc_run_task(&task_setup_data); | ||
5785 | if (IS_ERR(task)) | ||
5786 | return PTR_ERR(task); | ||
5787 | if (sync == false) | ||
5788 | goto out; | ||
5789 | status = nfs4_wait_for_completion_rpc_task(task); | ||
5790 | if (status != 0) | ||
5791 | goto out; | ||
5792 | status = task->tk_status; | ||
5793 | out: | ||
5794 | dprintk("%s: status %d\n", __func__, status); | ||
5795 | rpc_put_task(task); | ||
5796 | return status; | ||
5797 | } | ||
5609 | #endif /* CONFIG_NFS_V4_1 */ | 5798 | #endif /* CONFIG_NFS_V4_1 */ |
5610 | 5799 | ||
5611 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 5800 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
@@ -5741,6 +5930,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
5741 | .close_context = nfs4_close_context, | 5930 | .close_context = nfs4_close_context, |
5742 | .open_context = nfs4_atomic_open, | 5931 | .open_context = nfs4_atomic_open, |
5743 | .init_client = nfs4_init_client, | 5932 | .init_client = nfs4_init_client, |
5933 | .secinfo = nfs4_proc_secinfo, | ||
5744 | }; | 5934 | }; |
5745 | 5935 | ||
5746 | static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { | 5936 | static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 0cf560f77884..dddfb5795d7b 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/kdev_t.h> | 46 | #include <linux/kdev_t.h> |
47 | #include <linux/sunrpc/clnt.h> | 47 | #include <linux/sunrpc/clnt.h> |
48 | #include <linux/sunrpc/msg_prot.h> | 48 | #include <linux/sunrpc/msg_prot.h> |
49 | #include <linux/sunrpc/gss_api.h> | ||
49 | #include <linux/nfs.h> | 50 | #include <linux/nfs.h> |
50 | #include <linux/nfs4.h> | 51 | #include <linux/nfs4.h> |
51 | #include <linux/nfs_fs.h> | 52 | #include <linux/nfs_fs.h> |
@@ -112,7 +113,7 @@ static int nfs4_stat_to_errno(int); | |||
112 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) | 113 | #define encode_restorefh_maxsz (op_encode_hdr_maxsz) |
113 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) | 114 | #define decode_restorefh_maxsz (op_decode_hdr_maxsz) |
114 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) | 115 | #define encode_fsinfo_maxsz (encode_getattr_maxsz) |
115 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) | 116 | #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15) |
116 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) | 117 | #define encode_renew_maxsz (op_encode_hdr_maxsz + 3) |
117 | #define decode_renew_maxsz (op_decode_hdr_maxsz) | 118 | #define decode_renew_maxsz (op_decode_hdr_maxsz) |
118 | #define encode_setclientid_maxsz \ | 119 | #define encode_setclientid_maxsz \ |
@@ -253,6 +254,8 @@ static int nfs4_stat_to_errno(int); | |||
253 | (encode_getattr_maxsz) | 254 | (encode_getattr_maxsz) |
254 | #define decode_fs_locations_maxsz \ | 255 | #define decode_fs_locations_maxsz \ |
255 | (0) | 256 | (0) |
257 | #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) | ||
258 | #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN))) | ||
256 | 259 | ||
257 | #if defined(CONFIG_NFS_V4_1) | 260 | #if defined(CONFIG_NFS_V4_1) |
258 | #define NFS4_MAX_MACHINE_NAME_LEN (64) | 261 | #define NFS4_MAX_MACHINE_NAME_LEN (64) |
@@ -324,6 +327,18 @@ static int nfs4_stat_to_errno(int); | |||
324 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | 327 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ |
325 | decode_stateid_maxsz + \ | 328 | decode_stateid_maxsz + \ |
326 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) | 329 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) |
330 | #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ | ||
331 | 2 /* offset */ + \ | ||
332 | 2 /* length */ + \ | ||
333 | 1 /* reclaim */ + \ | ||
334 | encode_stateid_maxsz + \ | ||
335 | 1 /* new offset (true) */ + \ | ||
336 | 2 /* last byte written */ + \ | ||
337 | 1 /* nt_timechanged (false) */ + \ | ||
338 | 1 /* layoutupdate4 layout type */ + \ | ||
339 | 1 /* NULL filelayout layoutupdate4 payload */) | ||
340 | #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) | ||
341 | |||
327 | #else /* CONFIG_NFS_V4_1 */ | 342 | #else /* CONFIG_NFS_V4_1 */ |
328 | #define encode_sequence_maxsz 0 | 343 | #define encode_sequence_maxsz 0 |
329 | #define decode_sequence_maxsz 0 | 344 | #define decode_sequence_maxsz 0 |
@@ -676,6 +691,14 @@ static int nfs4_stat_to_errno(int); | |||
676 | decode_putfh_maxsz + \ | 691 | decode_putfh_maxsz + \ |
677 | decode_lookup_maxsz + \ | 692 | decode_lookup_maxsz + \ |
678 | decode_fs_locations_maxsz) | 693 | decode_fs_locations_maxsz) |
694 | #define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \ | ||
695 | encode_sequence_maxsz + \ | ||
696 | encode_putfh_maxsz + \ | ||
697 | encode_secinfo_maxsz) | ||
698 | #define NFS4_dec_secinfo_sz (compound_decode_hdr_maxsz + \ | ||
699 | decode_sequence_maxsz + \ | ||
700 | decode_putfh_maxsz + \ | ||
701 | decode_secinfo_maxsz) | ||
679 | #if defined(CONFIG_NFS_V4_1) | 702 | #if defined(CONFIG_NFS_V4_1) |
680 | #define NFS4_enc_exchange_id_sz \ | 703 | #define NFS4_enc_exchange_id_sz \ |
681 | (compound_encode_hdr_maxsz + \ | 704 | (compound_encode_hdr_maxsz + \ |
@@ -727,6 +750,17 @@ static int nfs4_stat_to_errno(int); | |||
727 | decode_sequence_maxsz + \ | 750 | decode_sequence_maxsz + \ |
728 | decode_putfh_maxsz + \ | 751 | decode_putfh_maxsz + \ |
729 | decode_layoutget_maxsz) | 752 | decode_layoutget_maxsz) |
753 | #define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \ | ||
754 | encode_sequence_maxsz +\ | ||
755 | encode_putfh_maxsz + \ | ||
756 | encode_layoutcommit_maxsz + \ | ||
757 | encode_getattr_maxsz) | ||
758 | #define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \ | ||
759 | decode_sequence_maxsz + \ | ||
760 | decode_putfh_maxsz + \ | ||
761 | decode_layoutcommit_maxsz + \ | ||
762 | decode_getattr_maxsz) | ||
763 | |||
730 | 764 | ||
731 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 765 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
732 | compound_encode_hdr_maxsz + | 766 | compound_encode_hdr_maxsz + |
@@ -1620,6 +1654,18 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state | |||
1620 | hdr->replen += decode_delegreturn_maxsz; | 1654 | hdr->replen += decode_delegreturn_maxsz; |
1621 | } | 1655 | } |
1622 | 1656 | ||
1657 | static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr) | ||
1658 | { | ||
1659 | int len = name->len; | ||
1660 | __be32 *p; | ||
1661 | |||
1662 | p = reserve_space(xdr, 8 + len); | ||
1663 | *p++ = cpu_to_be32(OP_SECINFO); | ||
1664 | xdr_encode_opaque(p, name->name, len); | ||
1665 | hdr->nops++; | ||
1666 | hdr->replen += decode_secinfo_maxsz; | ||
1667 | } | ||
1668 | |||
1623 | #if defined(CONFIG_NFS_V4_1) | 1669 | #if defined(CONFIG_NFS_V4_1) |
1624 | /* NFSv4.1 operations */ | 1670 | /* NFSv4.1 operations */ |
1625 | static void encode_exchange_id(struct xdr_stream *xdr, | 1671 | static void encode_exchange_id(struct xdr_stream *xdr, |
@@ -1816,6 +1862,34 @@ encode_layoutget(struct xdr_stream *xdr, | |||
1816 | hdr->nops++; | 1862 | hdr->nops++; |
1817 | hdr->replen += decode_layoutget_maxsz; | 1863 | hdr->replen += decode_layoutget_maxsz; |
1818 | } | 1864 | } |
1865 | |||
1866 | static int | ||
1867 | encode_layoutcommit(struct xdr_stream *xdr, | ||
1868 | const struct nfs4_layoutcommit_args *args, | ||
1869 | struct compound_hdr *hdr) | ||
1870 | { | ||
1871 | __be32 *p; | ||
1872 | |||
1873 | dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, | ||
1874 | NFS_SERVER(args->inode)->pnfs_curr_ld->id); | ||
1875 | |||
1876 | p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); | ||
1877 | *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); | ||
1878 | /* Only whole file layouts */ | ||
1879 | p = xdr_encode_hyper(p, 0); /* offset */ | ||
1880 | p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */ | ||
1881 | *p++ = cpu_to_be32(0); /* reclaim */ | ||
1882 | p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE); | ||
1883 | *p++ = cpu_to_be32(1); /* newoffset = TRUE */ | ||
1884 | p = xdr_encode_hyper(p, args->lastbytewritten); | ||
1885 | *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ | ||
1886 | *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ | ||
1887 | *p++ = cpu_to_be32(0); /* no file layout payload */ | ||
1888 | |||
1889 | hdr->nops++; | ||
1890 | hdr->replen += decode_layoutcommit_maxsz; | ||
1891 | return 0; | ||
1892 | } | ||
1819 | #endif /* CONFIG_NFS_V4_1 */ | 1893 | #endif /* CONFIG_NFS_V4_1 */ |
1820 | 1894 | ||
1821 | /* | 1895 | /* |
@@ -2294,7 +2368,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr, | |||
2294 | encode_sequence(xdr, &args->seq_args, &hdr); | 2368 | encode_sequence(xdr, &args->seq_args, &hdr); |
2295 | encode_putfh(xdr, args->fh, &hdr); | 2369 | encode_putfh(xdr, args->fh, &hdr); |
2296 | encode_commit(xdr, args, &hdr); | 2370 | encode_commit(xdr, args, &hdr); |
2297 | encode_getfattr(xdr, args->bitmask, &hdr); | 2371 | if (args->bitmask) |
2372 | encode_getfattr(xdr, args->bitmask, &hdr); | ||
2298 | encode_nops(&hdr); | 2373 | encode_nops(&hdr); |
2299 | } | 2374 | } |
2300 | 2375 | ||
@@ -2465,6 +2540,24 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, | |||
2465 | encode_nops(&hdr); | 2540 | encode_nops(&hdr); |
2466 | } | 2541 | } |
2467 | 2542 | ||
2543 | /* | ||
2544 | * Encode SECINFO request | ||
2545 | */ | ||
2546 | static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req, | ||
2547 | struct xdr_stream *xdr, | ||
2548 | struct nfs4_secinfo_arg *args) | ||
2549 | { | ||
2550 | struct compound_hdr hdr = { | ||
2551 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2552 | }; | ||
2553 | |||
2554 | encode_compound_hdr(xdr, req, &hdr); | ||
2555 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2556 | encode_putfh(xdr, args->dir_fh, &hdr); | ||
2557 | encode_secinfo(xdr, args->name, &hdr); | ||
2558 | encode_nops(&hdr); | ||
2559 | } | ||
2560 | |||
2468 | #if defined(CONFIG_NFS_V4_1) | 2561 | #if defined(CONFIG_NFS_V4_1) |
2469 | /* | 2562 | /* |
2470 | * EXCHANGE_ID request | 2563 | * EXCHANGE_ID request |
@@ -2604,8 +2697,32 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, | |||
2604 | encode_sequence(xdr, &args->seq_args, &hdr); | 2697 | encode_sequence(xdr, &args->seq_args, &hdr); |
2605 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | 2698 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); |
2606 | encode_layoutget(xdr, args, &hdr); | 2699 | encode_layoutget(xdr, args, &hdr); |
2700 | |||
2701 | xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, | ||
2702 | args->layout.pages, 0, args->layout.pglen); | ||
2703 | |||
2607 | encode_nops(&hdr); | 2704 | encode_nops(&hdr); |
2608 | } | 2705 | } |
2706 | |||
2707 | /* | ||
2708 | * Encode LAYOUTCOMMIT request | ||
2709 | */ | ||
2710 | static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, | ||
2711 | struct xdr_stream *xdr, | ||
2712 | struct nfs4_layoutcommit_args *args) | ||
2713 | { | ||
2714 | struct compound_hdr hdr = { | ||
2715 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2716 | }; | ||
2717 | |||
2718 | encode_compound_hdr(xdr, req, &hdr); | ||
2719 | encode_sequence(xdr, &args->seq_args, &hdr); | ||
2720 | encode_putfh(xdr, NFS_FH(args->inode), &hdr); | ||
2721 | encode_layoutcommit(xdr, args, &hdr); | ||
2722 | encode_getfattr(xdr, args->bitmask, &hdr); | ||
2723 | encode_nops(&hdr); | ||
2724 | return 0; | ||
2725 | } | ||
2609 | #endif /* CONFIG_NFS_V4_1 */ | 2726 | #endif /* CONFIG_NFS_V4_1 */ |
2610 | 2727 | ||
2611 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 2728 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -2925,6 +3042,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) | |||
2925 | if (unlikely(!p)) | 3042 | if (unlikely(!p)) |
2926 | goto out_overflow; | 3043 | goto out_overflow; |
2927 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; | 3044 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; |
3045 | return -be32_to_cpup(p); | ||
2928 | } | 3046 | } |
2929 | return 0; | 3047 | return 0; |
2930 | out_overflow: | 3048 | out_overflow: |
@@ -3912,6 +4030,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, | |||
3912 | fattr->valid |= status; | 4030 | fattr->valid |= status; |
3913 | 4031 | ||
3914 | status = decode_attr_error(xdr, bitmap); | 4032 | status = decode_attr_error(xdr, bitmap); |
4033 | if (status == -NFS4ERR_WRONGSEC) { | ||
4034 | nfs_fixup_secinfo_attributes(fattr, fh); | ||
4035 | status = 0; | ||
4036 | } | ||
3915 | if (status < 0) | 4037 | if (status < 0) |
3916 | goto xdr_error; | 4038 | goto xdr_error; |
3917 | 4039 | ||
@@ -4680,6 +4802,73 @@ static int decode_delegreturn(struct xdr_stream *xdr) | |||
4680 | return decode_op_hdr(xdr, OP_DELEGRETURN); | 4802 | return decode_op_hdr(xdr, OP_DELEGRETURN); |
4681 | } | 4803 | } |
4682 | 4804 | ||
4805 | static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor) | ||
4806 | { | ||
4807 | __be32 *p; | ||
4808 | |||
4809 | p = xdr_inline_decode(xdr, 4); | ||
4810 | if (unlikely(!p)) | ||
4811 | goto out_overflow; | ||
4812 | flavor->gss.sec_oid4.len = be32_to_cpup(p); | ||
4813 | if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN) | ||
4814 | goto out_err; | ||
4815 | |||
4816 | p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len); | ||
4817 | if (unlikely(!p)) | ||
4818 | goto out_overflow; | ||
4819 | memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len); | ||
4820 | |||
4821 | p = xdr_inline_decode(xdr, 8); | ||
4822 | if (unlikely(!p)) | ||
4823 | goto out_overflow; | ||
4824 | flavor->gss.qop4 = be32_to_cpup(p++); | ||
4825 | flavor->gss.service = be32_to_cpup(p); | ||
4826 | |||
4827 | return 0; | ||
4828 | |||
4829 | out_overflow: | ||
4830 | print_overflow_msg(__func__, xdr); | ||
4831 | return -EIO; | ||
4832 | out_err: | ||
4833 | return -EINVAL; | ||
4834 | } | ||
4835 | |||
4836 | static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) | ||
4837 | { | ||
4838 | struct nfs4_secinfo_flavor *sec_flavor; | ||
4839 | int status; | ||
4840 | __be32 *p; | ||
4841 | int i; | ||
4842 | |||
4843 | status = decode_op_hdr(xdr, OP_SECINFO); | ||
4844 | p = xdr_inline_decode(xdr, 4); | ||
4845 | if (unlikely(!p)) | ||
4846 | goto out_overflow; | ||
4847 | res->flavors->num_flavors = be32_to_cpup(p); | ||
4848 | |||
4849 | for (i = 0; i < res->flavors->num_flavors; i++) { | ||
4850 | sec_flavor = &res->flavors->flavors[i]; | ||
4851 | if ((char *)&sec_flavor[1] - (char *)res > PAGE_SIZE) | ||
4852 | break; | ||
4853 | |||
4854 | p = xdr_inline_decode(xdr, 4); | ||
4855 | if (unlikely(!p)) | ||
4856 | goto out_overflow; | ||
4857 | sec_flavor->flavor = be32_to_cpup(p); | ||
4858 | |||
4859 | if (sec_flavor->flavor == RPC_AUTH_GSS) { | ||
4860 | if (decode_secinfo_gss(xdr, sec_flavor)) | ||
4861 | break; | ||
4862 | } | ||
4863 | } | ||
4864 | |||
4865 | return 0; | ||
4866 | |||
4867 | out_overflow: | ||
4868 | print_overflow_msg(__func__, xdr); | ||
4869 | return -EIO; | ||
4870 | } | ||
4871 | |||
4683 | #if defined(CONFIG_NFS_V4_1) | 4872 | #if defined(CONFIG_NFS_V4_1) |
4684 | static int decode_exchange_id(struct xdr_stream *xdr, | 4873 | static int decode_exchange_id(struct xdr_stream *xdr, |
4685 | struct nfs41_exchange_id_res *res) | 4874 | struct nfs41_exchange_id_res *res) |
@@ -4950,6 +5139,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4950 | __be32 *p; | 5139 | __be32 *p; |
4951 | int status; | 5140 | int status; |
4952 | u32 layout_count; | 5141 | u32 layout_count; |
5142 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | ||
5143 | struct kvec *iov = rcvbuf->head; | ||
5144 | u32 hdrlen, recvd; | ||
4953 | 5145 | ||
4954 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | 5146 | status = decode_op_hdr(xdr, OP_LAYOUTGET); |
4955 | if (status) | 5147 | if (status) |
@@ -4966,17 +5158,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4966 | return -EINVAL; | 5158 | return -EINVAL; |
4967 | } | 5159 | } |
4968 | 5160 | ||
4969 | p = xdr_inline_decode(xdr, 24); | 5161 | p = xdr_inline_decode(xdr, 28); |
4970 | if (unlikely(!p)) | 5162 | if (unlikely(!p)) |
4971 | goto out_overflow; | 5163 | goto out_overflow; |
4972 | p = xdr_decode_hyper(p, &res->range.offset); | 5164 | p = xdr_decode_hyper(p, &res->range.offset); |
4973 | p = xdr_decode_hyper(p, &res->range.length); | 5165 | p = xdr_decode_hyper(p, &res->range.length); |
4974 | res->range.iomode = be32_to_cpup(p++); | 5166 | res->range.iomode = be32_to_cpup(p++); |
4975 | res->type = be32_to_cpup(p++); | 5167 | res->type = be32_to_cpup(p++); |
4976 | 5168 | res->layoutp->len = be32_to_cpup(p); | |
4977 | status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); | ||
4978 | if (unlikely(status)) | ||
4979 | return status; | ||
4980 | 5169 | ||
4981 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", | 5170 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", |
4982 | __func__, | 5171 | __func__, |
@@ -4984,12 +5173,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | |||
4984 | (unsigned long)res->range.length, | 5173 | (unsigned long)res->range.length, |
4985 | res->range.iomode, | 5174 | res->range.iomode, |
4986 | res->type, | 5175 | res->type, |
4987 | res->layout.len); | 5176 | res->layoutp->len); |
4988 | 5177 | ||
4989 | /* nfs4_proc_layoutget allocated a single page */ | 5178 | hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base; |
4990 | if (res->layout.len > PAGE_SIZE) | 5179 | recvd = req->rq_rcv_buf.len - hdrlen; |
4991 | return -ENOMEM; | 5180 | if (res->layoutp->len > recvd) { |
4992 | memcpy(res->layout.buf, p, res->layout.len); | 5181 | dprintk("NFS: server cheating in layoutget reply: " |
5182 | "layout len %u > recvd %u\n", | ||
5183 | res->layoutp->len, recvd); | ||
5184 | return -EINVAL; | ||
5185 | } | ||
5186 | |||
5187 | xdr_read_pages(xdr, res->layoutp->len); | ||
4993 | 5188 | ||
4994 | if (layout_count > 1) { | 5189 | if (layout_count > 1) { |
4995 | /* We only handle a length one array at the moment. Any | 5190 | /* We only handle a length one array at the moment. Any |
@@ -5006,6 +5201,35 @@ out_overflow: | |||
5006 | print_overflow_msg(__func__, xdr); | 5201 | print_overflow_msg(__func__, xdr); |
5007 | return -EIO; | 5202 | return -EIO; |
5008 | } | 5203 | } |
5204 | |||
5205 | static int decode_layoutcommit(struct xdr_stream *xdr, | ||
5206 | struct rpc_rqst *req, | ||
5207 | struct nfs4_layoutcommit_res *res) | ||
5208 | { | ||
5209 | __be32 *p; | ||
5210 | __u32 sizechanged; | ||
5211 | int status; | ||
5212 | |||
5213 | status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT); | ||
5214 | if (status) | ||
5215 | return status; | ||
5216 | |||
5217 | p = xdr_inline_decode(xdr, 4); | ||
5218 | if (unlikely(!p)) | ||
5219 | goto out_overflow; | ||
5220 | sizechanged = be32_to_cpup(p); | ||
5221 | |||
5222 | if (sizechanged) { | ||
5223 | /* throw away new size */ | ||
5224 | p = xdr_inline_decode(xdr, 8); | ||
5225 | if (unlikely(!p)) | ||
5226 | goto out_overflow; | ||
5227 | } | ||
5228 | return 0; | ||
5229 | out_overflow: | ||
5230 | print_overflow_msg(__func__, xdr); | ||
5231 | return -EIO; | ||
5232 | } | ||
5009 | #endif /* CONFIG_NFS_V4_1 */ | 5233 | #endif /* CONFIG_NFS_V4_1 */ |
5010 | 5234 | ||
5011 | /* | 5235 | /* |
@@ -5723,8 +5947,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr, | |||
5723 | status = decode_commit(xdr, res); | 5947 | status = decode_commit(xdr, res); |
5724 | if (status) | 5948 | if (status) |
5725 | goto out; | 5949 | goto out; |
5726 | decode_getfattr(xdr, res->fattr, res->server, | 5950 | if (res->fattr) |
5727 | !RPC_IS_ASYNC(rqstp->rq_task)); | 5951 | decode_getfattr(xdr, res->fattr, res->server, |
5952 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
5728 | out: | 5953 | out: |
5729 | return status; | 5954 | return status; |
5730 | } | 5955 | } |
@@ -5919,6 +6144,32 @@ out: | |||
5919 | return status; | 6144 | return status; |
5920 | } | 6145 | } |
5921 | 6146 | ||
6147 | /* | ||
6148 | * Decode SECINFO response | ||
6149 | */ | ||
6150 | static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp, | ||
6151 | struct xdr_stream *xdr, | ||
6152 | struct nfs4_secinfo_res *res) | ||
6153 | { | ||
6154 | struct compound_hdr hdr; | ||
6155 | int status; | ||
6156 | |||
6157 | status = decode_compound_hdr(xdr, &hdr); | ||
6158 | if (status) | ||
6159 | goto out; | ||
6160 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6161 | if (status) | ||
6162 | goto out; | ||
6163 | status = decode_putfh(xdr); | ||
6164 | if (status) | ||
6165 | goto out; | ||
6166 | status = decode_secinfo(xdr, res); | ||
6167 | if (status) | ||
6168 | goto out; | ||
6169 | out: | ||
6170 | return status; | ||
6171 | } | ||
6172 | |||
5922 | #if defined(CONFIG_NFS_V4_1) | 6173 | #if defined(CONFIG_NFS_V4_1) |
5923 | /* | 6174 | /* |
5924 | * Decode EXCHANGE_ID response | 6175 | * Decode EXCHANGE_ID response |
@@ -6066,6 +6317,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, | |||
6066 | out: | 6317 | out: |
6067 | return status; | 6318 | return status; |
6068 | } | 6319 | } |
6320 | |||
6321 | /* | ||
6322 | * Decode LAYOUTCOMMIT response | ||
6323 | */ | ||
6324 | static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, | ||
6325 | struct xdr_stream *xdr, | ||
6326 | struct nfs4_layoutcommit_res *res) | ||
6327 | { | ||
6328 | struct compound_hdr hdr; | ||
6329 | int status; | ||
6330 | |||
6331 | status = decode_compound_hdr(xdr, &hdr); | ||
6332 | if (status) | ||
6333 | goto out; | ||
6334 | status = decode_sequence(xdr, &res->seq_res, rqstp); | ||
6335 | if (status) | ||
6336 | goto out; | ||
6337 | status = decode_putfh(xdr); | ||
6338 | if (status) | ||
6339 | goto out; | ||
6340 | status = decode_layoutcommit(xdr, rqstp, res); | ||
6341 | if (status) | ||
6342 | goto out; | ||
6343 | decode_getfattr(xdr, res->fattr, res->server, | ||
6344 | !RPC_IS_ASYNC(rqstp->rq_task)); | ||
6345 | out: | ||
6346 | return status; | ||
6347 | } | ||
6069 | #endif /* CONFIG_NFS_V4_1 */ | 6348 | #endif /* CONFIG_NFS_V4_1 */ |
6070 | 6349 | ||
6071 | /** | 6350 | /** |
@@ -6180,10 +6459,6 @@ static struct { | |||
6180 | { NFS4ERR_SYMLINK, -ELOOP }, | 6459 | { NFS4ERR_SYMLINK, -ELOOP }, |
6181 | { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, | 6460 | { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, |
6182 | { NFS4ERR_DEADLOCK, -EDEADLK }, | 6461 | { NFS4ERR_DEADLOCK, -EDEADLK }, |
6183 | { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs | ||
6184 | * to be handled by a | ||
6185 | * middle-layer. | ||
6186 | */ | ||
6187 | { -1, -EIO } | 6462 | { -1, -EIO } |
6188 | }; | 6463 | }; |
6189 | 6464 | ||
@@ -6258,6 +6533,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6258 | PROC(SETACL, enc_setacl, dec_setacl), | 6533 | PROC(SETACL, enc_setacl, dec_setacl), |
6259 | PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), | 6534 | PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), |
6260 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), | 6535 | PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), |
6536 | PROC(SECINFO, enc_secinfo, dec_secinfo), | ||
6261 | #if defined(CONFIG_NFS_V4_1) | 6537 | #if defined(CONFIG_NFS_V4_1) |
6262 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), | 6538 | PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), |
6263 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), | 6539 | PROC(CREATE_SESSION, enc_create_session, dec_create_session), |
@@ -6267,6 +6543,7 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
6267 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 6543 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), |
6268 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | 6544 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), |
6269 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | 6545 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), |
6546 | PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), | ||
6270 | #endif /* CONFIG_NFS_V4_1 */ | 6547 | #endif /* CONFIG_NFS_V4_1 */ |
6271 | }; | 6548 | }; |
6272 | 6549 | ||
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 23e794410669..87a593c2b055 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c | |||
@@ -223,6 +223,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, | |||
223 | desc->pg_count = 0; | 223 | desc->pg_count = 0; |
224 | desc->pg_bsize = bsize; | 224 | desc->pg_bsize = bsize; |
225 | desc->pg_base = 0; | 225 | desc->pg_base = 0; |
226 | desc->pg_moreio = 0; | ||
226 | desc->pg_inode = inode; | 227 | desc->pg_inode = inode; |
227 | desc->pg_doio = doio; | 228 | desc->pg_doio = doio; |
228 | desc->pg_ioflags = io_flags; | 229 | desc->pg_ioflags = io_flags; |
@@ -335,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, | |||
335 | struct nfs_page *req) | 336 | struct nfs_page *req) |
336 | { | 337 | { |
337 | while (!nfs_pageio_do_add_request(desc, req)) { | 338 | while (!nfs_pageio_do_add_request(desc, req)) { |
339 | desc->pg_moreio = 1; | ||
338 | nfs_pageio_doio(desc); | 340 | nfs_pageio_doio(desc); |
339 | if (desc->pg_error < 0) | 341 | if (desc->pg_error < 0) |
340 | return 0; | 342 | return 0; |
343 | desc->pg_moreio = 0; | ||
341 | } | 344 | } |
342 | return 1; | 345 | return 1; |
343 | } | 346 | } |
@@ -395,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi, | |||
395 | pgoff_t idx_end; | 398 | pgoff_t idx_end; |
396 | int found, i; | 399 | int found, i; |
397 | int res; | 400 | int res; |
401 | struct list_head *list; | ||
398 | 402 | ||
399 | res = 0; | 403 | res = 0; |
400 | if (npages == 0) | 404 | if (npages == 0) |
@@ -415,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi, | |||
415 | idx_start = req->wb_index + 1; | 419 | idx_start = req->wb_index + 1; |
416 | if (nfs_set_page_tag_locked(req)) { | 420 | if (nfs_set_page_tag_locked(req)) { |
417 | kref_get(&req->wb_kref); | 421 | kref_get(&req->wb_kref); |
418 | nfs_list_remove_request(req); | ||
419 | radix_tree_tag_clear(&nfsi->nfs_page_tree, | 422 | radix_tree_tag_clear(&nfsi->nfs_page_tree, |
420 | req->wb_index, tag); | 423 | req->wb_index, tag); |
421 | nfs_list_add_request(req, dst); | 424 | list = pnfs_choose_commit_list(req, dst); |
425 | nfs_list_add_request(req, list); | ||
422 | res++; | 426 | res++; |
423 | if (res == INT_MAX) | 427 | if (res == INT_MAX) |
424 | goto out; | 428 | goto out; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f38813a0a295..d9ab97269ce6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -259,6 +259,7 @@ put_lseg(struct pnfs_layout_segment *lseg) | |||
259 | pnfs_free_lseg_list(&free_me); | 259 | pnfs_free_lseg_list(&free_me); |
260 | } | 260 | } |
261 | } | 261 | } |
262 | EXPORT_SYMBOL_GPL(put_lseg); | ||
262 | 263 | ||
263 | static bool | 264 | static bool |
264 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) | 265 | should_free_lseg(u32 lseg_iomode, u32 recall_iomode) |
@@ -471,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
471 | struct nfs_server *server = NFS_SERVER(ino); | 472 | struct nfs_server *server = NFS_SERVER(ino); |
472 | struct nfs4_layoutget *lgp; | 473 | struct nfs4_layoutget *lgp; |
473 | struct pnfs_layout_segment *lseg = NULL; | 474 | struct pnfs_layout_segment *lseg = NULL; |
475 | struct page **pages = NULL; | ||
476 | int i; | ||
477 | u32 max_resp_sz, max_pages; | ||
474 | 478 | ||
475 | dprintk("--> %s\n", __func__); | 479 | dprintk("--> %s\n", __func__); |
476 | 480 | ||
@@ -478,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
478 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); | 482 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); |
479 | if (lgp == NULL) | 483 | if (lgp == NULL) |
480 | return NULL; | 484 | return NULL; |
485 | |||
486 | /* allocate pages for xdr post processing */ | ||
487 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
488 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
489 | |||
490 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); | ||
491 | if (!pages) | ||
492 | goto out_err_free; | ||
493 | |||
494 | for (i = 0; i < max_pages; i++) { | ||
495 | pages[i] = alloc_page(GFP_KERNEL); | ||
496 | if (!pages[i]) | ||
497 | goto out_err_free; | ||
498 | } | ||
499 | |||
481 | lgp->args.minlength = NFS4_MAX_UINT64; | 500 | lgp->args.minlength = NFS4_MAX_UINT64; |
482 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | 501 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; |
483 | lgp->args.range.iomode = iomode; | 502 | lgp->args.range.iomode = iomode; |
@@ -486,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
486 | lgp->args.type = server->pnfs_curr_ld->id; | 505 | lgp->args.type = server->pnfs_curr_ld->id; |
487 | lgp->args.inode = ino; | 506 | lgp->args.inode = ino; |
488 | lgp->args.ctx = get_nfs_open_context(ctx); | 507 | lgp->args.ctx = get_nfs_open_context(ctx); |
508 | lgp->args.layout.pages = pages; | ||
509 | lgp->args.layout.pglen = max_pages * PAGE_SIZE; | ||
489 | lgp->lsegpp = &lseg; | 510 | lgp->lsegpp = &lseg; |
490 | 511 | ||
491 | /* Synchronously retrieve layout information from server and | 512 | /* Synchronously retrieve layout information from server and |
@@ -496,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
496 | /* remember that LAYOUTGET failed and suspend trying */ | 517 | /* remember that LAYOUTGET failed and suspend trying */ |
497 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); | 518 | set_bit(lo_fail_bit(iomode), &lo->plh_flags); |
498 | } | 519 | } |
520 | |||
521 | /* free xdr pages */ | ||
522 | for (i = 0; i < max_pages; i++) | ||
523 | __free_page(pages[i]); | ||
524 | kfree(pages); | ||
525 | |||
499 | return lseg; | 526 | return lseg; |
527 | |||
528 | out_err_free: | ||
529 | /* free any allocated xdr pages, lgp as it's not used */ | ||
530 | if (pages) { | ||
531 | for (i = 0; i < max_pages; i++) { | ||
532 | if (!pages[i]) | ||
533 | break; | ||
534 | __free_page(pages[i]); | ||
535 | } | ||
536 | kfree(pages); | ||
537 | } | ||
538 | kfree(lgp); | ||
539 | return NULL; | ||
500 | } | 540 | } |
501 | 541 | ||
502 | bool pnfs_roc(struct inode *ino) | 542 | bool pnfs_roc(struct inode *ino) |
@@ -945,3 +985,105 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, | |||
945 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); | 985 | dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs); |
946 | return trypnfs; | 986 | return trypnfs; |
947 | } | 987 | } |
988 | |||
989 | /* | ||
990 | * Currently there is only one (whole file) write lseg. | ||
991 | */ | ||
992 | static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode) | ||
993 | { | ||
994 | struct pnfs_layout_segment *lseg, *rv = NULL; | ||
995 | |||
996 | list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list) | ||
997 | if (lseg->pls_range.iomode == IOMODE_RW) | ||
998 | rv = lseg; | ||
999 | return rv; | ||
1000 | } | ||
1001 | |||
1002 | void | ||
1003 | pnfs_set_layoutcommit(struct nfs_write_data *wdata) | ||
1004 | { | ||
1005 | struct nfs_inode *nfsi = NFS_I(wdata->inode); | ||
1006 | loff_t end_pos = wdata->args.offset + wdata->res.count; | ||
1007 | |||
1008 | spin_lock(&nfsi->vfs_inode.i_lock); | ||
1009 | if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1010 | /* references matched in nfs4_layoutcommit_release */ | ||
1011 | get_lseg(wdata->lseg); | ||
1012 | wdata->lseg->pls_lc_cred = | ||
1013 | get_rpccred(wdata->args.context->state->owner->so_cred); | ||
1014 | mark_inode_dirty_sync(wdata->inode); | ||
1015 | dprintk("%s: Set layoutcommit for inode %lu ", | ||
1016 | __func__, wdata->inode->i_ino); | ||
1017 | } | ||
1018 | if (end_pos > wdata->lseg->pls_end_pos) | ||
1019 | wdata->lseg->pls_end_pos = end_pos; | ||
1020 | spin_unlock(&nfsi->vfs_inode.i_lock); | ||
1021 | } | ||
1022 | EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit); | ||
1023 | |||
1024 | /* | ||
1025 | * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and | ||
1026 | * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough | ||
1027 | * data to disk to allow the server to recover the data if it crashes. | ||
1028 | * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag | ||
1029 | * is off, and a COMMIT is sent to a data server, or | ||
1030 | * if WRITEs to a data server return NFS_DATA_SYNC. | ||
1031 | */ | ||
1032 | int | ||
1033 | pnfs_layoutcommit_inode(struct inode *inode, bool sync) | ||
1034 | { | ||
1035 | struct nfs4_layoutcommit_data *data; | ||
1036 | struct nfs_inode *nfsi = NFS_I(inode); | ||
1037 | struct pnfs_layout_segment *lseg; | ||
1038 | struct rpc_cred *cred; | ||
1039 | loff_t end_pos; | ||
1040 | int status = 0; | ||
1041 | |||
1042 | dprintk("--> %s inode %lu\n", __func__, inode->i_ino); | ||
1043 | |||
1044 | if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) | ||
1045 | return 0; | ||
1046 | |||
1047 | /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */ | ||
1048 | data = kzalloc(sizeof(*data), GFP_NOFS); | ||
1049 | if (!data) { | ||
1050 | mark_inode_dirty_sync(inode); | ||
1051 | status = -ENOMEM; | ||
1052 | goto out; | ||
1053 | } | ||
1054 | |||
1055 | spin_lock(&inode->i_lock); | ||
1056 | if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) { | ||
1057 | spin_unlock(&inode->i_lock); | ||
1058 | kfree(data); | ||
1059 | goto out; | ||
1060 | } | ||
1061 | /* | ||
1062 | * Currently only one (whole file) write lseg which is referenced | ||
1063 | * in pnfs_set_layoutcommit and will be found. | ||
1064 | */ | ||
1065 | lseg = pnfs_list_write_lseg(inode); | ||
1066 | |||
1067 | end_pos = lseg->pls_end_pos; | ||
1068 | cred = lseg->pls_lc_cred; | ||
1069 | lseg->pls_end_pos = 0; | ||
1070 | lseg->pls_lc_cred = NULL; | ||
1071 | |||
1072 | memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data, | ||
1073 | sizeof(nfsi->layout->plh_stateid.data)); | ||
1074 | spin_unlock(&inode->i_lock); | ||
1075 | |||
1076 | data->args.inode = inode; | ||
1077 | data->lseg = lseg; | ||
1078 | data->cred = cred; | ||
1079 | nfs_fattr_init(&data->fattr); | ||
1080 | data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; | ||
1081 | data->res.fattr = &data->fattr; | ||
1082 | data->args.lastbytewritten = end_pos - 1; | ||
1083 | data->res.server = NFS_SERVER(inode); | ||
1084 | |||
1085 | status = nfs4_proc_layoutcommit(data, sync); | ||
1086 | out: | ||
1087 | dprintk("<-- %s status %d\n", __func__, status); | ||
1088 | return status; | ||
1089 | } | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 6380b9405bcd..bc4827202e7a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -43,6 +43,8 @@ struct pnfs_layout_segment { | |||
43 | atomic_t pls_refcount; | 43 | atomic_t pls_refcount; |
44 | unsigned long pls_flags; | 44 | unsigned long pls_flags; |
45 | struct pnfs_layout_hdr *pls_layout; | 45 | struct pnfs_layout_hdr *pls_layout; |
46 | struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */ | ||
47 | loff_t pls_end_pos; /* LAYOUTCOMMIT write end */ | ||
46 | }; | 48 | }; |
47 | 49 | ||
48 | enum pnfs_try_status { | 50 | enum pnfs_try_status { |
@@ -74,6 +76,13 @@ struct pnfs_layoutdriver_type { | |||
74 | /* test for nfs page cache coalescing */ | 76 | /* test for nfs page cache coalescing */ |
75 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); | 77 | int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); |
76 | 78 | ||
79 | /* Returns true if layoutdriver wants to divert this request to | ||
80 | * driver's commit routine. | ||
81 | */ | ||
82 | bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg); | ||
83 | struct list_head * (*choose_commit_list) (struct nfs_page *req); | ||
84 | int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how); | ||
85 | |||
77 | /* | 86 | /* |
78 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted | 87 | * Return PNFS_ATTEMPTED to indicate the layout code has attempted |
79 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS | 88 | * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS |
@@ -100,7 +109,6 @@ struct pnfs_device { | |||
100 | unsigned int layout_type; | 109 | unsigned int layout_type; |
101 | unsigned int mincount; | 110 | unsigned int mincount; |
102 | struct page **pages; | 111 | struct page **pages; |
103 | void *area; | ||
104 | unsigned int pgbase; | 112 | unsigned int pgbase; |
105 | unsigned int pglen; | 113 | unsigned int pglen; |
106 | }; | 114 | }; |
@@ -145,7 +153,8 @@ bool pnfs_roc(struct inode *ino); | |||
145 | void pnfs_roc_release(struct inode *ino); | 153 | void pnfs_roc_release(struct inode *ino); |
146 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); | 154 | void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); |
147 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); | 155 | bool pnfs_roc_drain(struct inode *ino, u32 *barrier); |
148 | 156 | void pnfs_set_layoutcommit(struct nfs_write_data *wdata); | |
157 | int pnfs_layoutcommit_inode(struct inode *inode, bool sync); | ||
149 | 158 | ||
150 | static inline int lo_fail_bit(u32 iomode) | 159 | static inline int lo_fail_bit(u32 iomode) |
151 | { | 160 | { |
@@ -169,6 +178,51 @@ static inline int pnfs_enabled_sb(struct nfs_server *nfss) | |||
169 | return nfss->pnfs_curr_ld != NULL; | 178 | return nfss->pnfs_curr_ld != NULL; |
170 | } | 179 | } |
171 | 180 | ||
181 | static inline void | ||
182 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
183 | { | ||
184 | if (lseg) { | ||
185 | struct pnfs_layoutdriver_type *ld; | ||
186 | |||
187 | ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld; | ||
188 | if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) { | ||
189 | set_bit(PG_PNFS_COMMIT, &req->wb_flags); | ||
190 | req->wb_commit_lseg = get_lseg(lseg); | ||
191 | } | ||
192 | } | ||
193 | } | ||
194 | |||
195 | static inline int | ||
196 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | ||
197 | { | ||
198 | if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags)) | ||
199 | return PNFS_NOT_ATTEMPTED; | ||
200 | return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how); | ||
201 | } | ||
202 | |||
203 | static inline struct list_head * | ||
204 | pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) | ||
205 | { | ||
206 | struct list_head *rv; | ||
207 | |||
208 | if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) { | ||
209 | struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode; | ||
210 | |||
211 | set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags); | ||
212 | rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req); | ||
213 | /* matched by ref taken when PG_PNFS_COMMIT is set */ | ||
214 | put_lseg(req->wb_commit_lseg); | ||
215 | } else | ||
216 | rv = mds; | ||
217 | return rv; | ||
218 | } | ||
219 | |||
220 | static inline void pnfs_clear_request_commit(struct nfs_page *req) | ||
221 | { | ||
222 | if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) | ||
223 | put_lseg(req->wb_commit_lseg); | ||
224 | } | ||
225 | |||
172 | #else /* CONFIG_NFS_V4_1 */ | 226 | #else /* CONFIG_NFS_V4_1 */ |
173 | 227 | ||
174 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | 228 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) |
@@ -252,6 +306,31 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) | |||
252 | pgio->pg_test = NULL; | 306 | pgio->pg_test = NULL; |
253 | } | 307 | } |
254 | 308 | ||
309 | static inline void | ||
310 | pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) | ||
311 | { | ||
312 | } | ||
313 | |||
314 | static inline int | ||
315 | pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how) | ||
316 | { | ||
317 | return PNFS_NOT_ATTEMPTED; | ||
318 | } | ||
319 | |||
320 | static inline struct list_head * | ||
321 | pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds) | ||
322 | { | ||
323 | return mds; | ||
324 | } | ||
325 | |||
326 | static inline void pnfs_clear_request_commit(struct nfs_page *req) | ||
327 | { | ||
328 | } | ||
329 | |||
330 | static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) | ||
331 | { | ||
332 | return 0; | ||
333 | } | ||
255 | #endif /* CONFIG_NFS_V4_1 */ | 334 | #endif /* CONFIG_NFS_V4_1 */ |
256 | 335 | ||
257 | #endif /* FS_NFS_PNFS_H */ | 336 | #endif /* FS_NFS_PNFS_H */ |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index b8ec170f2a0f..ac40b8535d7e 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -177,7 +177,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
177 | } | 177 | } |
178 | 178 | ||
179 | static int | 179 | static int |
180 | nfs_proc_lookup(struct inode *dir, struct qstr *name, | 180 | nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, |
181 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) | 181 | struct nfs_fh *fhandle, struct nfs_fattr *fattr) |
182 | { | 182 | { |
183 | struct nfs_diropargs arg = { | 183 | struct nfs_diropargs arg = { |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 47a3ad63e0d5..85d75254328e 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -59,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void) | |||
59 | } | 59 | } |
60 | return p; | 60 | return p; |
61 | } | 61 | } |
62 | EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); | ||
62 | 63 | ||
63 | void nfs_commit_free(struct nfs_write_data *p) | 64 | void nfs_commit_free(struct nfs_write_data *p) |
64 | { | 65 | { |
@@ -66,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p) | |||
66 | kfree(p->pagevec); | 67 | kfree(p->pagevec); |
67 | mempool_free(p, nfs_commit_mempool); | 68 | mempool_free(p, nfs_commit_mempool); |
68 | } | 69 | } |
70 | EXPORT_SYMBOL_GPL(nfs_commit_free); | ||
69 | 71 | ||
70 | struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | 72 | struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) |
71 | { | 73 | { |
@@ -179,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc) | |||
179 | if (wbc->for_reclaim) | 181 | if (wbc->for_reclaim) |
180 | return FLUSH_HIGHPRI | FLUSH_STABLE; | 182 | return FLUSH_HIGHPRI | FLUSH_STABLE; |
181 | if (wbc->for_kupdate || wbc->for_background) | 183 | if (wbc->for_kupdate || wbc->for_background) |
182 | return FLUSH_LOWPRI; | 184 | return FLUSH_LOWPRI | FLUSH_COND_STABLE; |
183 | return 0; | 185 | return FLUSH_COND_STABLE; |
184 | } | 186 | } |
185 | 187 | ||
186 | /* | 188 | /* |
@@ -441,7 +443,7 @@ nfs_mark_request_dirty(struct nfs_page *req) | |||
441 | * Add a request to the inode's commit list. | 443 | * Add a request to the inode's commit list. |
442 | */ | 444 | */ |
443 | static void | 445 | static void |
444 | nfs_mark_request_commit(struct nfs_page *req) | 446 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
445 | { | 447 | { |
446 | struct inode *inode = req->wb_context->path.dentry->d_inode; | 448 | struct inode *inode = req->wb_context->path.dentry->d_inode; |
447 | struct nfs_inode *nfsi = NFS_I(inode); | 449 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -453,6 +455,7 @@ nfs_mark_request_commit(struct nfs_page *req) | |||
453 | NFS_PAGE_TAG_COMMIT); | 455 | NFS_PAGE_TAG_COMMIT); |
454 | nfsi->ncommit++; | 456 | nfsi->ncommit++; |
455 | spin_unlock(&inode->i_lock); | 457 | spin_unlock(&inode->i_lock); |
458 | pnfs_mark_request_commit(req, lseg); | ||
456 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | 459 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
457 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); | 460 | inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); |
458 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); | 461 | __mark_inode_dirty(inode, I_DIRTY_DATASYNC); |
@@ -474,14 +477,18 @@ nfs_clear_request_commit(struct nfs_page *req) | |||
474 | static inline | 477 | static inline |
475 | int nfs_write_need_commit(struct nfs_write_data *data) | 478 | int nfs_write_need_commit(struct nfs_write_data *data) |
476 | { | 479 | { |
477 | return data->verf.committed != NFS_FILE_SYNC; | 480 | if (data->verf.committed == NFS_DATA_SYNC) |
481 | return data->lseg == NULL; | ||
482 | else | ||
483 | return data->verf.committed != NFS_FILE_SYNC; | ||
478 | } | 484 | } |
479 | 485 | ||
480 | static inline | 486 | static inline |
481 | int nfs_reschedule_unstable_write(struct nfs_page *req) | 487 | int nfs_reschedule_unstable_write(struct nfs_page *req, |
488 | struct nfs_write_data *data) | ||
482 | { | 489 | { |
483 | if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { | 490 | if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { |
484 | nfs_mark_request_commit(req); | 491 | nfs_mark_request_commit(req, data->lseg); |
485 | return 1; | 492 | return 1; |
486 | } | 493 | } |
487 | if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { | 494 | if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { |
@@ -492,7 +499,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req) | |||
492 | } | 499 | } |
493 | #else | 500 | #else |
494 | static inline void | 501 | static inline void |
495 | nfs_mark_request_commit(struct nfs_page *req) | 502 | nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) |
496 | { | 503 | { |
497 | } | 504 | } |
498 | 505 | ||
@@ -509,7 +516,8 @@ int nfs_write_need_commit(struct nfs_write_data *data) | |||
509 | } | 516 | } |
510 | 517 | ||
511 | static inline | 518 | static inline |
512 | int nfs_reschedule_unstable_write(struct nfs_page *req) | 519 | int nfs_reschedule_unstable_write(struct nfs_page *req, |
520 | struct nfs_write_data *data) | ||
513 | { | 521 | { |
514 | return 0; | 522 | return 0; |
515 | } | 523 | } |
@@ -612,9 +620,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, | |||
612 | } | 620 | } |
613 | 621 | ||
614 | if (nfs_clear_request_commit(req) && | 622 | if (nfs_clear_request_commit(req) && |
615 | radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, | 623 | radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, |
616 | req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) | 624 | req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) { |
617 | NFS_I(inode)->ncommit--; | 625 | NFS_I(inode)->ncommit--; |
626 | pnfs_clear_request_commit(req); | ||
627 | } | ||
618 | 628 | ||
619 | /* Okay, the request matches. Update the region */ | 629 | /* Okay, the request matches. Update the region */ |
620 | if (offset < req->wb_offset) { | 630 | if (offset < req->wb_offset) { |
@@ -762,11 +772,12 @@ int nfs_updatepage(struct file *file, struct page *page, | |||
762 | return status; | 772 | return status; |
763 | } | 773 | } |
764 | 774 | ||
765 | static void nfs_writepage_release(struct nfs_page *req) | 775 | static void nfs_writepage_release(struct nfs_page *req, |
776 | struct nfs_write_data *data) | ||
766 | { | 777 | { |
767 | struct page *page = req->wb_page; | 778 | struct page *page = req->wb_page; |
768 | 779 | ||
769 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) | 780 | if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) |
770 | nfs_inode_remove_request(req); | 781 | nfs_inode_remove_request(req); |
771 | nfs_clear_page_tag_locked(req); | 782 | nfs_clear_page_tag_locked(req); |
772 | nfs_end_page_writeback(page); | 783 | nfs_end_page_writeback(page); |
@@ -863,7 +874,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req, | |||
863 | data->args.context = get_nfs_open_context(req->wb_context); | 874 | data->args.context = get_nfs_open_context(req->wb_context); |
864 | data->args.lock_context = req->wb_lock_context; | 875 | data->args.lock_context = req->wb_lock_context; |
865 | data->args.stable = NFS_UNSTABLE; | 876 | data->args.stable = NFS_UNSTABLE; |
866 | if (how & FLUSH_STABLE) { | 877 | if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { |
867 | data->args.stable = NFS_DATA_SYNC; | 878 | data->args.stable = NFS_DATA_SYNC; |
868 | if (!nfs_need_commit(NFS_I(inode))) | 879 | if (!nfs_need_commit(NFS_I(inode))) |
869 | data->args.stable = NFS_FILE_SYNC; | 880 | data->args.stable = NFS_FILE_SYNC; |
@@ -912,6 +923,12 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) | |||
912 | 923 | ||
913 | nfs_list_remove_request(req); | 924 | nfs_list_remove_request(req); |
914 | 925 | ||
926 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
927 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || | ||
928 | desc->pg_count > wsize)) | ||
929 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
930 | |||
931 | |||
915 | nbytes = desc->pg_count; | 932 | nbytes = desc->pg_count; |
916 | do { | 933 | do { |
917 | size_t len = min(nbytes, wsize); | 934 | size_t len = min(nbytes, wsize); |
@@ -1002,6 +1019,10 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) | |||
1002 | if ((!lseg) && list_is_singular(&data->pages)) | 1019 | if ((!lseg) && list_is_singular(&data->pages)) |
1003 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); | 1020 | lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); |
1004 | 1021 | ||
1022 | if ((desc->pg_ioflags & FLUSH_COND_STABLE) && | ||
1023 | (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) | ||
1024 | desc->pg_ioflags &= ~FLUSH_COND_STABLE; | ||
1025 | |||
1005 | /* Set up the argument struct */ | 1026 | /* Set up the argument struct */ |
1006 | ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); | 1027 | ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); |
1007 | out: | 1028 | out: |
@@ -1074,7 +1095,7 @@ static void nfs_writeback_release_partial(void *calldata) | |||
1074 | 1095 | ||
1075 | out: | 1096 | out: |
1076 | if (atomic_dec_and_test(&req->wb_complete)) | 1097 | if (atomic_dec_and_test(&req->wb_complete)) |
1077 | nfs_writepage_release(req); | 1098 | nfs_writepage_release(req, data); |
1078 | nfs_writedata_release(calldata); | 1099 | nfs_writedata_release(calldata); |
1079 | } | 1100 | } |
1080 | 1101 | ||
@@ -1141,7 +1162,7 @@ static void nfs_writeback_release_full(void *calldata) | |||
1141 | 1162 | ||
1142 | if (nfs_write_need_commit(data)) { | 1163 | if (nfs_write_need_commit(data)) { |
1143 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); | 1164 | memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); |
1144 | nfs_mark_request_commit(req); | 1165 | nfs_mark_request_commit(req, data->lseg); |
1145 | dprintk(" marked for commit\n"); | 1166 | dprintk(" marked for commit\n"); |
1146 | goto next; | 1167 | goto next; |
1147 | } | 1168 | } |
@@ -1251,57 +1272,82 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) | |||
1251 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 1272 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
1252 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) | 1273 | static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) |
1253 | { | 1274 | { |
1275 | int ret; | ||
1276 | |||
1254 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) | 1277 | if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) |
1255 | return 1; | 1278 | return 1; |
1256 | if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, | 1279 | if (!may_wait) |
1257 | NFS_INO_COMMIT, nfs_wait_bit_killable, | 1280 | return 0; |
1258 | TASK_KILLABLE)) | 1281 | ret = out_of_line_wait_on_bit_lock(&nfsi->flags, |
1259 | return 1; | 1282 | NFS_INO_COMMIT, |
1260 | return 0; | 1283 | nfs_wait_bit_killable, |
1284 | TASK_KILLABLE); | ||
1285 | return (ret < 0) ? ret : 1; | ||
1261 | } | 1286 | } |
1262 | 1287 | ||
1263 | static void nfs_commit_clear_lock(struct nfs_inode *nfsi) | 1288 | void nfs_commit_clear_lock(struct nfs_inode *nfsi) |
1264 | { | 1289 | { |
1265 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); | 1290 | clear_bit(NFS_INO_COMMIT, &nfsi->flags); |
1266 | smp_mb__after_clear_bit(); | 1291 | smp_mb__after_clear_bit(); |
1267 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); | 1292 | wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); |
1268 | } | 1293 | } |
1294 | EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); | ||
1269 | 1295 | ||
1270 | 1296 | void nfs_commitdata_release(void *data) | |
1271 | static void nfs_commitdata_release(void *data) | ||
1272 | { | 1297 | { |
1273 | struct nfs_write_data *wdata = data; | 1298 | struct nfs_write_data *wdata = data; |
1274 | 1299 | ||
1300 | put_lseg(wdata->lseg); | ||
1275 | put_nfs_open_context(wdata->args.context); | 1301 | put_nfs_open_context(wdata->args.context); |
1276 | nfs_commit_free(wdata); | 1302 | nfs_commit_free(wdata); |
1277 | } | 1303 | } |
1304 | EXPORT_SYMBOL_GPL(nfs_commitdata_release); | ||
1278 | 1305 | ||
1279 | /* | 1306 | int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, |
1280 | * Set up the argument/result storage required for the RPC call. | 1307 | const struct rpc_call_ops *call_ops, |
1281 | */ | 1308 | int how) |
1282 | static int nfs_commit_rpcsetup(struct list_head *head, | ||
1283 | struct nfs_write_data *data, | ||
1284 | int how) | ||
1285 | { | 1309 | { |
1286 | struct nfs_page *first = nfs_list_entry(head->next); | ||
1287 | struct inode *inode = first->wb_context->path.dentry->d_inode; | ||
1288 | int priority = flush_task_priority(how); | ||
1289 | struct rpc_task *task; | 1310 | struct rpc_task *task; |
1311 | int priority = flush_task_priority(how); | ||
1290 | struct rpc_message msg = { | 1312 | struct rpc_message msg = { |
1291 | .rpc_argp = &data->args, | 1313 | .rpc_argp = &data->args, |
1292 | .rpc_resp = &data->res, | 1314 | .rpc_resp = &data->res, |
1293 | .rpc_cred = first->wb_context->cred, | 1315 | .rpc_cred = data->cred, |
1294 | }; | 1316 | }; |
1295 | struct rpc_task_setup task_setup_data = { | 1317 | struct rpc_task_setup task_setup_data = { |
1296 | .task = &data->task, | 1318 | .task = &data->task, |
1297 | .rpc_client = NFS_CLIENT(inode), | 1319 | .rpc_client = clnt, |
1298 | .rpc_message = &msg, | 1320 | .rpc_message = &msg, |
1299 | .callback_ops = &nfs_commit_ops, | 1321 | .callback_ops = call_ops, |
1300 | .callback_data = data, | 1322 | .callback_data = data, |
1301 | .workqueue = nfsiod_workqueue, | 1323 | .workqueue = nfsiod_workqueue, |
1302 | .flags = RPC_TASK_ASYNC, | 1324 | .flags = RPC_TASK_ASYNC, |
1303 | .priority = priority, | 1325 | .priority = priority, |
1304 | }; | 1326 | }; |
1327 | /* Set up the initial task struct. */ | ||
1328 | NFS_PROTO(data->inode)->commit_setup(data, &msg); | ||
1329 | |||
1330 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | ||
1331 | |||
1332 | task = rpc_run_task(&task_setup_data); | ||
1333 | if (IS_ERR(task)) | ||
1334 | return PTR_ERR(task); | ||
1335 | if (how & FLUSH_SYNC) | ||
1336 | rpc_wait_for_completion_task(task); | ||
1337 | rpc_put_task(task); | ||
1338 | return 0; | ||
1339 | } | ||
1340 | EXPORT_SYMBOL_GPL(nfs_initiate_commit); | ||
1341 | |||
1342 | /* | ||
1343 | * Set up the argument/result storage required for the RPC call. | ||
1344 | */ | ||
1345 | void nfs_init_commit(struct nfs_write_data *data, | ||
1346 | struct list_head *head, | ||
1347 | struct pnfs_layout_segment *lseg) | ||
1348 | { | ||
1349 | struct nfs_page *first = nfs_list_entry(head->next); | ||
1350 | struct inode *inode = first->wb_context->path.dentry->d_inode; | ||
1305 | 1351 | ||
1306 | /* Set up the RPC argument and reply structs | 1352 | /* Set up the RPC argument and reply structs |
1307 | * NB: take care not to mess about with data->commit et al. */ | 1353 | * NB: take care not to mess about with data->commit et al. */ |
@@ -1309,7 +1355,9 @@ static int nfs_commit_rpcsetup(struct list_head *head, | |||
1309 | list_splice_init(head, &data->pages); | 1355 | list_splice_init(head, &data->pages); |
1310 | 1356 | ||
1311 | data->inode = inode; | 1357 | data->inode = inode; |
1312 | data->cred = msg.rpc_cred; | 1358 | data->cred = first->wb_context->cred; |
1359 | data->lseg = lseg; /* reference transferred */ | ||
1360 | data->mds_ops = &nfs_commit_ops; | ||
1313 | 1361 | ||
1314 | data->args.fh = NFS_FH(data->inode); | 1362 | data->args.fh = NFS_FH(data->inode); |
1315 | /* Note: we always request a commit of the entire inode */ | 1363 | /* Note: we always request a commit of the entire inode */ |
@@ -1320,20 +1368,25 @@ static int nfs_commit_rpcsetup(struct list_head *head, | |||
1320 | data->res.fattr = &data->fattr; | 1368 | data->res.fattr = &data->fattr; |
1321 | data->res.verf = &data->verf; | 1369 | data->res.verf = &data->verf; |
1322 | nfs_fattr_init(&data->fattr); | 1370 | nfs_fattr_init(&data->fattr); |
1371 | } | ||
1372 | EXPORT_SYMBOL_GPL(nfs_init_commit); | ||
1323 | 1373 | ||
1324 | /* Set up the initial task struct. */ | 1374 | void nfs_retry_commit(struct list_head *page_list, |
1325 | NFS_PROTO(inode)->commit_setup(data, &msg); | 1375 | struct pnfs_layout_segment *lseg) |
1326 | 1376 | { | |
1327 | dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); | 1377 | struct nfs_page *req; |
1328 | 1378 | ||
1329 | task = rpc_run_task(&task_setup_data); | 1379 | while (!list_empty(page_list)) { |
1330 | if (IS_ERR(task)) | 1380 | req = nfs_list_entry(page_list->next); |
1331 | return PTR_ERR(task); | 1381 | nfs_list_remove_request(req); |
1332 | if (how & FLUSH_SYNC) | 1382 | nfs_mark_request_commit(req, lseg); |
1333 | rpc_wait_for_completion_task(task); | 1383 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); |
1334 | rpc_put_task(task); | 1384 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, |
1335 | return 0; | 1385 | BDI_RECLAIMABLE); |
1386 | nfs_clear_page_tag_locked(req); | ||
1387 | } | ||
1336 | } | 1388 | } |
1389 | EXPORT_SYMBOL_GPL(nfs_retry_commit); | ||
1337 | 1390 | ||
1338 | /* | 1391 | /* |
1339 | * Commit dirty pages | 1392 | * Commit dirty pages |
@@ -1342,7 +1395,6 @@ static int | |||
1342 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) | 1395 | nfs_commit_list(struct inode *inode, struct list_head *head, int how) |
1343 | { | 1396 | { |
1344 | struct nfs_write_data *data; | 1397 | struct nfs_write_data *data; |
1345 | struct nfs_page *req; | ||
1346 | 1398 | ||
1347 | data = nfs_commitdata_alloc(); | 1399 | data = nfs_commitdata_alloc(); |
1348 | 1400 | ||
@@ -1350,17 +1402,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) | |||
1350 | goto out_bad; | 1402 | goto out_bad; |
1351 | 1403 | ||
1352 | /* Set up the argument struct */ | 1404 | /* Set up the argument struct */ |
1353 | return nfs_commit_rpcsetup(head, data, how); | 1405 | nfs_init_commit(data, head, NULL); |
1406 | return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); | ||
1354 | out_bad: | 1407 | out_bad: |
1355 | while (!list_empty(head)) { | 1408 | nfs_retry_commit(head, NULL); |
1356 | req = nfs_list_entry(head->next); | ||
1357 | nfs_list_remove_request(req); | ||
1358 | nfs_mark_request_commit(req); | ||
1359 | dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
1360 | dec_bdi_stat(req->wb_page->mapping->backing_dev_info, | ||
1361 | BDI_RECLAIMABLE); | ||
1362 | nfs_clear_page_tag_locked(req); | ||
1363 | } | ||
1364 | nfs_commit_clear_lock(NFS_I(inode)); | 1409 | nfs_commit_clear_lock(NFS_I(inode)); |
1365 | return -ENOMEM; | 1410 | return -ENOMEM; |
1366 | } | 1411 | } |
@@ -1380,10 +1425,9 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) | |||
1380 | return; | 1425 | return; |
1381 | } | 1426 | } |
1382 | 1427 | ||
1383 | static void nfs_commit_release(void *calldata) | 1428 | void nfs_commit_release_pages(struct nfs_write_data *data) |
1384 | { | 1429 | { |
1385 | struct nfs_write_data *data = calldata; | 1430 | struct nfs_page *req; |
1386 | struct nfs_page *req; | ||
1387 | int status = data->task.tk_status; | 1431 | int status = data->task.tk_status; |
1388 | 1432 | ||
1389 | while (!list_empty(&data->pages)) { | 1433 | while (!list_empty(&data->pages)) { |
@@ -1417,6 +1461,14 @@ static void nfs_commit_release(void *calldata) | |||
1417 | next: | 1461 | next: |
1418 | nfs_clear_page_tag_locked(req); | 1462 | nfs_clear_page_tag_locked(req); |
1419 | } | 1463 | } |
1464 | } | ||
1465 | EXPORT_SYMBOL_GPL(nfs_commit_release_pages); | ||
1466 | |||
1467 | static void nfs_commit_release(void *calldata) | ||
1468 | { | ||
1469 | struct nfs_write_data *data = calldata; | ||
1470 | |||
1471 | nfs_commit_release_pages(data); | ||
1420 | nfs_commit_clear_lock(NFS_I(data->inode)); | 1472 | nfs_commit_clear_lock(NFS_I(data->inode)); |
1421 | nfs_commitdata_release(calldata); | 1473 | nfs_commitdata_release(calldata); |
1422 | } | 1474 | } |
@@ -1433,23 +1485,30 @@ int nfs_commit_inode(struct inode *inode, int how) | |||
1433 | { | 1485 | { |
1434 | LIST_HEAD(head); | 1486 | LIST_HEAD(head); |
1435 | int may_wait = how & FLUSH_SYNC; | 1487 | int may_wait = how & FLUSH_SYNC; |
1436 | int res = 0; | 1488 | int res; |
1437 | 1489 | ||
1438 | if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) | 1490 | res = nfs_commit_set_lock(NFS_I(inode), may_wait); |
1491 | if (res <= 0) | ||
1439 | goto out_mark_dirty; | 1492 | goto out_mark_dirty; |
1440 | spin_lock(&inode->i_lock); | 1493 | spin_lock(&inode->i_lock); |
1441 | res = nfs_scan_commit(inode, &head, 0, 0); | 1494 | res = nfs_scan_commit(inode, &head, 0, 0); |
1442 | spin_unlock(&inode->i_lock); | 1495 | spin_unlock(&inode->i_lock); |
1443 | if (res) { | 1496 | if (res) { |
1444 | int error = nfs_commit_list(inode, &head, how); | 1497 | int error; |
1498 | |||
1499 | error = pnfs_commit_list(inode, &head, how); | ||
1500 | if (error == PNFS_NOT_ATTEMPTED) | ||
1501 | error = nfs_commit_list(inode, &head, how); | ||
1445 | if (error < 0) | 1502 | if (error < 0) |
1446 | return error; | 1503 | return error; |
1447 | if (may_wait) | 1504 | if (!may_wait) |
1448 | wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT, | ||
1449 | nfs_wait_bit_killable, | ||
1450 | TASK_KILLABLE); | ||
1451 | else | ||
1452 | goto out_mark_dirty; | 1505 | goto out_mark_dirty; |
1506 | error = wait_on_bit(&NFS_I(inode)->flags, | ||
1507 | NFS_INO_COMMIT, | ||
1508 | nfs_wait_bit_killable, | ||
1509 | TASK_KILLABLE); | ||
1510 | if (error < 0) | ||
1511 | return error; | ||
1453 | } else | 1512 | } else |
1454 | nfs_commit_clear_lock(NFS_I(inode)); | 1513 | nfs_commit_clear_lock(NFS_I(inode)); |
1455 | return res; | 1514 | return res; |
@@ -1503,7 +1562,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr | |||
1503 | 1562 | ||
1504 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) | 1563 | int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) |
1505 | { | 1564 | { |
1506 | return nfs_commit_unstable_pages(inode, wbc); | 1565 | int ret; |
1566 | |||
1567 | ret = nfs_commit_unstable_pages(inode, wbc); | ||
1568 | if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { | ||
1569 | int status; | ||
1570 | bool sync = true; | ||
1571 | |||
1572 | if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || | ||
1573 | wbc->for_background) | ||
1574 | sync = false; | ||
1575 | |||
1576 | status = pnfs_layoutcommit_inode(inode, sync); | ||
1577 | if (status < 0) | ||
1578 | return status; | ||
1579 | } | ||
1580 | return ret; | ||
1507 | } | 1581 | } |
1508 | 1582 | ||
1509 | /* | 1583 | /* |
diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 84c27d69d421..ec0f277be7f5 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c | |||
@@ -117,7 +117,6 @@ int nfsacl_encode(struct xdr_buf *buf, unsigned int base, struct inode *inode, | |||
117 | * invoked in contexts where a memory allocation failure is | 117 | * invoked in contexts where a memory allocation failure is |
118 | * fatal. Fortunately this fake ACL is small enough to | 118 | * fatal. Fortunately this fake ACL is small enough to |
119 | * construct on the stack. */ | 119 | * construct on the stack. */ |
120 | memset(acl2, 0, sizeof(acl2)); | ||
121 | posix_acl_init(acl2, 4); | 120 | posix_acl_init(acl2, 4); |
122 | 121 | ||
123 | /* Insert entries in canonical order: other orders seem | 122 | /* Insert entries in canonical order: other orders seem |
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 4c29fcf557d1..07ea8d3e6ea2 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -22,13 +22,14 @@ | |||
22 | #include <linux/module.h> | 22 | #include <linux/module.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/spinlock.h> | 24 | #include <linux/spinlock.h> |
25 | #include <linux/writeback.h> /* for inode_lock */ | ||
26 | 25 | ||
27 | #include <asm/atomic.h> | 26 | #include <asm/atomic.h> |
28 | 27 | ||
29 | #include <linux/fsnotify_backend.h> | 28 | #include <linux/fsnotify_backend.h> |
30 | #include "fsnotify.h" | 29 | #include "fsnotify.h" |
31 | 30 | ||
31 | #include "../internal.h" | ||
32 | |||
32 | /* | 33 | /* |
33 | * Recalculate the mask of events relevant to a given inode locked. | 34 | * Recalculate the mask of events relevant to a given inode locked. |
34 | */ | 35 | */ |
@@ -237,15 +238,14 @@ out: | |||
237 | * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. | 238 | * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. |
238 | * @list: list of inodes being unmounted (sb->s_inodes) | 239 | * @list: list of inodes being unmounted (sb->s_inodes) |
239 | * | 240 | * |
240 | * Called with inode_lock held, protecting the unmounting super block's list | 241 | * Called during unmount with no locks held, so needs to be safe against |
241 | * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay. | 242 | * concurrent modifiers. We temporarily drop inode_sb_list_lock and CAN block. |
242 | * We temporarily drop inode_lock, however, and CAN block. | ||
243 | */ | 243 | */ |
244 | void fsnotify_unmount_inodes(struct list_head *list) | 244 | void fsnotify_unmount_inodes(struct list_head *list) |
245 | { | 245 | { |
246 | struct inode *inode, *next_i, *need_iput = NULL; | 246 | struct inode *inode, *next_i, *need_iput = NULL; |
247 | 247 | ||
248 | spin_lock(&inode_lock); | 248 | spin_lock(&inode_sb_list_lock); |
249 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { | 249 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { |
250 | struct inode *need_iput_tmp; | 250 | struct inode *need_iput_tmp; |
251 | 251 | ||
@@ -254,8 +254,11 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
254 | * I_WILL_FREE, or I_NEW which is fine because by that point | 254 | * I_WILL_FREE, or I_NEW which is fine because by that point |
255 | * the inode cannot have any associated watches. | 255 | * the inode cannot have any associated watches. |
256 | */ | 256 | */ |
257 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 257 | spin_lock(&inode->i_lock); |
258 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { | ||
259 | spin_unlock(&inode->i_lock); | ||
258 | continue; | 260 | continue; |
261 | } | ||
259 | 262 | ||
260 | /* | 263 | /* |
261 | * If i_count is zero, the inode cannot have any watches and | 264 | * If i_count is zero, the inode cannot have any watches and |
@@ -263,8 +266,10 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
263 | * evict all inodes with zero i_count from icache which is | 266 | * evict all inodes with zero i_count from icache which is |
264 | * unnecessarily violent and may in fact be illegal to do. | 267 | * unnecessarily violent and may in fact be illegal to do. |
265 | */ | 268 | */ |
266 | if (!atomic_read(&inode->i_count)) | 269 | if (!atomic_read(&inode->i_count)) { |
270 | spin_unlock(&inode->i_lock); | ||
267 | continue; | 271 | continue; |
272 | } | ||
268 | 273 | ||
269 | need_iput_tmp = need_iput; | 274 | need_iput_tmp = need_iput; |
270 | need_iput = NULL; | 275 | need_iput = NULL; |
@@ -274,22 +279,25 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
274 | __iget(inode); | 279 | __iget(inode); |
275 | else | 280 | else |
276 | need_iput_tmp = NULL; | 281 | need_iput_tmp = NULL; |
282 | spin_unlock(&inode->i_lock); | ||
277 | 283 | ||
278 | /* In case the dropping of a reference would nuke next_i. */ | 284 | /* In case the dropping of a reference would nuke next_i. */ |
279 | if ((&next_i->i_sb_list != list) && | 285 | if ((&next_i->i_sb_list != list) && |
280 | atomic_read(&next_i->i_count) && | 286 | atomic_read(&next_i->i_count)) { |
281 | !(next_i->i_state & (I_FREEING | I_WILL_FREE))) { | 287 | spin_lock(&next_i->i_lock); |
282 | __iget(next_i); | 288 | if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) { |
283 | need_iput = next_i; | 289 | __iget(next_i); |
290 | need_iput = next_i; | ||
291 | } | ||
292 | spin_unlock(&next_i->i_lock); | ||
284 | } | 293 | } |
285 | 294 | ||
286 | /* | 295 | /* |
287 | * We can safely drop inode_lock here because we hold | 296 | * We can safely drop inode_sb_list_lock here because we hold |
288 | * references on both inode and next_i. Also no new inodes | 297 | * references on both inode and next_i. Also no new inodes |
289 | * will be added since the umount has begun. Finally, | 298 | * will be added since the umount has begun. |
290 | * iprune_mutex keeps shrink_icache_memory() away. | ||
291 | */ | 299 | */ |
292 | spin_unlock(&inode_lock); | 300 | spin_unlock(&inode_sb_list_lock); |
293 | 301 | ||
294 | if (need_iput_tmp) | 302 | if (need_iput_tmp) |
295 | iput(need_iput_tmp); | 303 | iput(need_iput_tmp); |
@@ -301,7 +309,7 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
301 | 309 | ||
302 | iput(inode); | 310 | iput(inode); |
303 | 311 | ||
304 | spin_lock(&inode_lock); | 312 | spin_lock(&inode_sb_list_lock); |
305 | } | 313 | } |
306 | spin_unlock(&inode_lock); | 314 | spin_unlock(&inode_sb_list_lock); |
307 | } | 315 | } |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 325185e514bb..50c00856f730 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -91,7 +91,6 @@ | |||
91 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
92 | #include <linux/spinlock.h> | 92 | #include <linux/spinlock.h> |
93 | #include <linux/srcu.h> | 93 | #include <linux/srcu.h> |
94 | #include <linux/writeback.h> /* for inode_lock */ | ||
95 | 94 | ||
96 | #include <asm/atomic.h> | 95 | #include <asm/atomic.h> |
97 | 96 | ||
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index 85eebff6d0d7..e86577d6c5c3 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/mount.h> | 23 | #include <linux/mount.h> |
24 | #include <linux/mutex.h> | 24 | #include <linux/mutex.h> |
25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
26 | #include <linux/writeback.h> /* for inode_lock */ | ||
27 | 26 | ||
28 | #include <asm/atomic.h> | 27 | #include <asm/atomic.h> |
29 | 28 | ||
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index a627ed82c0a3..0b56c6b7ec01 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -54,7 +54,7 @@ | |||
54 | * | 54 | * |
55 | * Return 1 if the attributes match and 0 if not. | 55 | * Return 1 if the attributes match and 0 if not. |
56 | * | 56 | * |
57 | * NOTE: This function runs with the inode_lock spin lock held so it is not | 57 | * NOTE: This function runs with the inode->i_lock spin lock held so it is not |
58 | * allowed to sleep. | 58 | * allowed to sleep. |
59 | */ | 59 | */ |
60 | int ntfs_test_inode(struct inode *vi, ntfs_attr *na) | 60 | int ntfs_test_inode(struct inode *vi, ntfs_attr *na) |
@@ -98,7 +98,7 @@ int ntfs_test_inode(struct inode *vi, ntfs_attr *na) | |||
98 | * | 98 | * |
99 | * Return 0 on success and -errno on error. | 99 | * Return 0 on success and -errno on error. |
100 | * | 100 | * |
101 | * NOTE: This function runs with the inode_lock spin lock held so it is not | 101 | * NOTE: This function runs with the inode->i_lock spin lock held so it is not |
102 | * allowed to sleep. (Hence the GFP_ATOMIC allocation.) | 102 | * allowed to sleep. (Hence the GFP_ATOMIC allocation.) |
103 | */ | 103 | */ |
104 | static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) | 104 | static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7c708a418acc..2e7addfd9803 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -182,7 +182,8 @@ static void m_stop(struct seq_file *m, void *v) | |||
182 | struct proc_maps_private *priv = m->private; | 182 | struct proc_maps_private *priv = m->private; |
183 | struct vm_area_struct *vma = v; | 183 | struct vm_area_struct *vma = v; |
184 | 184 | ||
185 | vma_stop(priv, vma); | 185 | if (!IS_ERR(vma)) |
186 | vma_stop(priv, vma); | ||
186 | if (priv->task) | 187 | if (priv->task) |
187 | put_task_struct(priv->task); | 188 | put_task_struct(priv->task); |
188 | } | 189 | } |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index a2a622e079f0..fcc8ae75d874 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -76,7 +76,7 @@ | |||
76 | #include <linux/buffer_head.h> | 76 | #include <linux/buffer_head.h> |
77 | #include <linux/capability.h> | 77 | #include <linux/capability.h> |
78 | #include <linux/quotaops.h> | 78 | #include <linux/quotaops.h> |
79 | #include <linux/writeback.h> /* for inode_lock, oddly enough.. */ | 79 | #include "../internal.h" /* ugh */ |
80 | 80 | ||
81 | #include <asm/uaccess.h> | 81 | #include <asm/uaccess.h> |
82 | 82 | ||
@@ -900,33 +900,38 @@ static void add_dquot_ref(struct super_block *sb, int type) | |||
900 | int reserved = 0; | 900 | int reserved = 0; |
901 | #endif | 901 | #endif |
902 | 902 | ||
903 | spin_lock(&inode_lock); | 903 | spin_lock(&inode_sb_list_lock); |
904 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 904 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
905 | if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) | 905 | spin_lock(&inode->i_lock); |
906 | if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || | ||
907 | !atomic_read(&inode->i_writecount) || | ||
908 | !dqinit_needed(inode, type)) { | ||
909 | spin_unlock(&inode->i_lock); | ||
906 | continue; | 910 | continue; |
911 | } | ||
907 | #ifdef CONFIG_QUOTA_DEBUG | 912 | #ifdef CONFIG_QUOTA_DEBUG |
908 | if (unlikely(inode_get_rsv_space(inode) > 0)) | 913 | if (unlikely(inode_get_rsv_space(inode) > 0)) |
909 | reserved = 1; | 914 | reserved = 1; |
910 | #endif | 915 | #endif |
911 | if (!atomic_read(&inode->i_writecount)) | ||
912 | continue; | ||
913 | if (!dqinit_needed(inode, type)) | ||
914 | continue; | ||
915 | |||
916 | __iget(inode); | 916 | __iget(inode); |
917 | spin_unlock(&inode_lock); | 917 | spin_unlock(&inode->i_lock); |
918 | spin_unlock(&inode_sb_list_lock); | ||
918 | 919 | ||
919 | iput(old_inode); | 920 | iput(old_inode); |
920 | __dquot_initialize(inode, type); | 921 | __dquot_initialize(inode, type); |
921 | /* We hold a reference to 'inode' so it couldn't have been | 922 | |
922 | * removed from s_inodes list while we dropped the inode_lock. | 923 | /* |
923 | * We cannot iput the inode now as we can be holding the last | 924 | * We hold a reference to 'inode' so it couldn't have been |
924 | * reference and we cannot iput it under inode_lock. So we | 925 | * removed from s_inodes list while we dropped the |
925 | * keep the reference and iput it later. */ | 926 | * inode_sb_list_lock We cannot iput the inode now as we can be |
927 | * holding the last reference and we cannot iput it under | ||
928 | * inode_sb_list_lock. So we keep the reference and iput it | ||
929 | * later. | ||
930 | */ | ||
926 | old_inode = inode; | 931 | old_inode = inode; |
927 | spin_lock(&inode_lock); | 932 | spin_lock(&inode_sb_list_lock); |
928 | } | 933 | } |
929 | spin_unlock(&inode_lock); | 934 | spin_unlock(&inode_sb_list_lock); |
930 | iput(old_inode); | 935 | iput(old_inode); |
931 | 936 | ||
932 | #ifdef CONFIG_QUOTA_DEBUG | 937 | #ifdef CONFIG_QUOTA_DEBUG |
@@ -1007,7 +1012,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
1007 | struct inode *inode; | 1012 | struct inode *inode; |
1008 | int reserved = 0; | 1013 | int reserved = 0; |
1009 | 1014 | ||
1010 | spin_lock(&inode_lock); | 1015 | spin_lock(&inode_sb_list_lock); |
1011 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { | 1016 | list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { |
1012 | /* | 1017 | /* |
1013 | * We have to scan also I_NEW inodes because they can already | 1018 | * We have to scan also I_NEW inodes because they can already |
@@ -1021,7 +1026,7 @@ static void remove_dquot_ref(struct super_block *sb, int type, | |||
1021 | remove_inode_dquot_ref(inode, type, tofree_head); | 1026 | remove_inode_dquot_ref(inode, type, tofree_head); |
1022 | } | 1027 | } |
1023 | } | 1028 | } |
1024 | spin_unlock(&inode_lock); | 1029 | spin_unlock(&inode_sb_list_lock); |
1025 | #ifdef CONFIG_QUOTA_DEBUG | 1030 | #ifdef CONFIG_QUOTA_DEBUG |
1026 | if (reserved) { | 1031 | if (reserved) { |
1027 | printk(KERN_WARNING "VFS (%s): Writes happened after quota" | 1032 | printk(KERN_WARNING "VFS (%s): Writes happened after quota" |