aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/autofs4/root.c5
-rw-r--r--fs/btrfs/ioctl.c5
-rw-r--r--fs/cachefiles/internal.h1
-rw-r--r--fs/cachefiles/namei.c98
-rw-r--r--fs/cachefiles/security.c4
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/auth.c1
-rw-r--r--fs/ceph/auth_none.h2
-rw-r--r--fs/ceph/auth_x.c32
-rw-r--r--fs/ceph/caps.c21
-rw-r--r--fs/ceph/dir.c9
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c8
-rw-r--r--fs/ceph/mds_client.c34
-rw-r--r--fs/ceph/messenger.c39
-rw-r--r--fs/ceph/messenger.h1
-rw-r--r--fs/ceph/osd_client.c26
-rw-r--r--fs/ceph/osd_client.h3
-rw-r--r--fs/ceph/osdmap.c29
-rw-r--r--fs/ceph/osdmap.h2
-rw-r--r--fs/ceph/rados.h1
-rw-r--r--fs/ceph/snap.c24
-rw-r--r--fs/ceph/super.c30
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/inode.c21
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/dir.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exofs/exofs.h2
-rw-r--r--fs/jfs/super.c13
-rw-r--r--fs/logfs/super.c14
-rw-r--r--fs/namei.c27
-rw-r--r--fs/namespace.c6
-rw-r--r--fs/nfs/client.c2
-rw-r--r--fs/nfs/delegation.c86
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/nfs4proc.c5
-rw-r--r--fs/nfs/super.c3
-rw-r--r--fs/nfs/write.c55
-rw-r--r--fs/nilfs2/super.c1
-rw-r--r--fs/notify/inotify/Kconfig1
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c16
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dlm/dlmast.c5
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c14
-rw-r--r--fs/ocfs2/file.c32
-rw-r--r--fs/ocfs2/inode.c68
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/namei.c58
-rw-r--r--fs/ocfs2/refcounttree.c3
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/proc/task_mmu.c19
-rw-r--r--fs/super.c1
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c112
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_mount.h1
63 files changed, 684 insertions, 309 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d92..e8e5e63ac950 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
177 } 177 }
178 /* Trigger mount for path component or follow link */ 178 /* Trigger mount for path component or follow link */
179 } else if (ino->flags & AUTOFS_INF_PENDING || 179 } else if (ino->flags & AUTOFS_INF_PENDING ||
180 autofs4_need_mount(flags) || 180 autofs4_need_mount(flags)) {
181 current->link_count) {
182 DPRINTK("waiting for mount name=%.*s", 181 DPRINTK("waiting for mount name=%.*s",
183 dentry->d_name.len, dentry->d_name.name); 182 dentry->d_name.len, dentry->d_name.name);
184 183
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
262 spin_unlock(&dcache_lock); 261 spin_unlock(&dcache_lock);
263 spin_unlock(&sbi->fs_lock); 262 spin_unlock(&sbi->fs_lock);
264 263
265 status = try_to_fill_dentry(dentry, 0); 264 status = try_to_fill_dentry(dentry, nd->flags);
266 if (status) 265 if (status)
267 goto out_error; 266 goto out_error;
268 267
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e84ef60ffe35..97a97839a867 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1481 ret = -EBADF; 1481 ret = -EBADF;
1482 goto out_drop_write; 1482 goto out_drop_write;
1483 } 1483 }
1484
1484 src = src_file->f_dentry->d_inode; 1485 src = src_file->f_dentry->d_inode;
1485 1486
1486 ret = -EINVAL; 1487 ret = -EINVAL;
1487 if (src == inode) 1488 if (src == inode)
1488 goto out_fput; 1489 goto out_fput;
1489 1490
1491 /* the src must be open for reading */
1492 if (!(src_file->f_mode & FMODE_READ))
1493 goto out_fput;
1494
1490 ret = -EISDIR; 1495 ret = -EISDIR;
1491 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 1496 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
1492 goto out_fput; 1497 goto out_fput;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c624..a8cd821226da 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
34 loff_t i_size; /* object size */ 34 loff_t i_size; /* object size */
35 unsigned long flags; 35 unsigned long flags;
36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */ 36#define CACHEFILES_OBJECT_ACTIVE 0 /* T if marked active */
37#define CACHEFILES_OBJECT_BURIED 1 /* T if preemptively buried */
37 atomic_t usage; /* object usage count */ 38 atomic_t usage; /* object usage count */
38 uint8_t type; /* object type */ 39 uint8_t type; /* object type */
39 uint8_t new; /* T if object new */ 40 uint8_t new; /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0d..f4a7840bf42c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
93} 93}
94 94
95/* 95/*
96 * mark the owner of a dentry, if there is one, to indicate that that dentry
97 * has been preemptively deleted
98 * - the caller must hold the i_mutex on the dentry's parent as required to
99 * call vfs_unlink(), vfs_rmdir() or vfs_rename()
100 */
101static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
102 struct dentry *dentry)
103{
104 struct cachefiles_object *object;
105 struct rb_node *p;
106
107 _enter(",'%*.*s'",
108 dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
109
110 write_lock(&cache->active_lock);
111
112 p = cache->active_nodes.rb_node;
113 while (p) {
114 object = rb_entry(p, struct cachefiles_object, active_node);
115 if (object->dentry > dentry)
116 p = p->rb_left;
117 else if (object->dentry < dentry)
118 p = p->rb_right;
119 else
120 goto found_dentry;
121 }
122
123 write_unlock(&cache->active_lock);
124 _leave(" [no owner]");
125 return;
126
127 /* found the dentry for */
128found_dentry:
129 kdebug("preemptive burial: OBJ%x [%s] %p",
130 object->fscache.debug_id,
131 fscache_object_states[object->fscache.state],
132 dentry);
133
134 if (object->fscache.state < FSCACHE_OBJECT_DYING) {
135 printk(KERN_ERR "\n");
136 printk(KERN_ERR "CacheFiles: Error:"
137 " Can't preemptively bury live object\n");
138 cachefiles_printk_object(object, NULL);
139 } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
140 printk(KERN_ERR "CacheFiles: Error:"
141 " Object already preemptively buried\n");
142 }
143
144 write_unlock(&cache->active_lock);
145 _leave(" [owner marked]");
146}
147
148/*
96 * record the fact that an object is now active 149 * record the fact that an object is now active
97 */ 150 */
98static int cachefiles_mark_object_active(struct cachefiles_cache *cache, 151static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
219 */ 272 */
220static int cachefiles_bury_object(struct cachefiles_cache *cache, 273static int cachefiles_bury_object(struct cachefiles_cache *cache,
221 struct dentry *dir, 274 struct dentry *dir,
222 struct dentry *rep) 275 struct dentry *rep,
276 bool preemptive)
223{ 277{
224 struct dentry *grave, *trap; 278 struct dentry *grave, *trap;
225 char nbuffer[8 + 8 + 1]; 279 char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
229 dir->d_name.len, dir->d_name.len, dir->d_name.name, 283 dir->d_name.len, dir->d_name.len, dir->d_name.name,
230 rep->d_name.len, rep->d_name.len, rep->d_name.name); 284 rep->d_name.len, rep->d_name.len, rep->d_name.name);
231 285
286 _debug("remove %p from %p", rep, dir);
287
232 /* non-directories can just be unlinked */ 288 /* non-directories can just be unlinked */
233 if (!S_ISDIR(rep->d_inode->i_mode)) { 289 if (!S_ISDIR(rep->d_inode->i_mode)) {
234 _debug("unlink stale object"); 290 _debug("unlink stale object");
235 ret = vfs_unlink(dir->d_inode, rep); 291 ret = vfs_unlink(dir->d_inode, rep);
236 292
293 if (preemptive)
294 cachefiles_mark_object_buried(cache, rep);
295
237 mutex_unlock(&dir->d_inode->i_mutex); 296 mutex_unlock(&dir->d_inode->i_mutex);
238 297
239 if (ret == -EIO) 298 if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
325 if (ret != 0 && ret != -ENOMEM) 384 if (ret != 0 && ret != -ENOMEM)
326 cachefiles_io_error(cache, "Rename failed with error %d", ret); 385 cachefiles_io_error(cache, "Rename failed with error %d", ret);
327 386
387 if (preemptive)
388 cachefiles_mark_object_buried(cache, rep);
389
328 unlock_rename(cache->graveyard, dir); 390 unlock_rename(cache->graveyard, dir);
329 dput(grave); 391 dput(grave);
330 _leave(" = 0"); 392 _leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
340 struct dentry *dir; 402 struct dentry *dir;
341 int ret; 403 int ret;
342 404
343 _enter(",{%p}", object->dentry); 405 _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
344 406
345 ASSERT(object->dentry); 407 ASSERT(object->dentry);
346 ASSERT(object->dentry->d_inode); 408 ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
350 412
351 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 413 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
352 414
353 /* we need to check that our parent is _still_ our parent - it may have 415 if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
354 * been renamed */ 416 /* object allocation for the same key preemptively deleted this
355 if (dir == object->dentry->d_parent) { 417 * object's file so that it could create its own file */
356 ret = cachefiles_bury_object(cache, dir, object->dentry); 418 _debug("object preemptively buried");
357 } else {
358 /* it got moved, presumably by cachefilesd culling it, so it's
359 * no longer in the key path and we can ignore it */
360 mutex_unlock(&dir->d_inode->i_mutex); 419 mutex_unlock(&dir->d_inode->i_mutex);
361 ret = 0; 420 ret = 0;
421 } else {
422 /* we need to check that our parent is _still_ our parent - it
423 * may have been renamed */
424 if (dir == object->dentry->d_parent) {
425 ret = cachefiles_bury_object(cache, dir,
426 object->dentry, false);
427 } else {
428 /* it got moved, presumably by cachefilesd culling it,
429 * so it's no longer in the key path and we can ignore
430 * it */
431 mutex_unlock(&dir->d_inode->i_mutex);
432 ret = 0;
433 }
362 } 434 }
363 435
364 dput(dir); 436 dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
381 const char *name; 453 const char *name;
382 int ret, nlen; 454 int ret, nlen;
383 455
384 _enter("{%p},,%s,", parent->dentry, key); 456 _enter("OBJ%x{%p},OBJ%x,%s,",
457 parent->fscache.debug_id, parent->dentry,
458 object->fscache.debug_id, key);
385 459
386 cache = container_of(parent->fscache.cache, 460 cache = container_of(parent->fscache.cache,
387 struct cachefiles_cache, cache); 461 struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
509 * mutex) */ 583 * mutex) */
510 object->dentry = NULL; 584 object->dentry = NULL;
511 585
512 ret = cachefiles_bury_object(cache, dir, next); 586 ret = cachefiles_bury_object(cache, dir, next, true);
513 dput(next); 587 dput(next);
514 next = NULL; 588 next = NULL;
515 589
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
828 /* actually remove the victim (drops the dir mutex) */ 902 /* actually remove the victim (drops the dir mutex) */
829 _debug("bury"); 903 _debug("bury");
830 904
831 ret = cachefiles_bury_object(cache, dir, victim); 905 ret = cachefiles_bury_object(cache, dir, victim, false);
832 if (ret < 0) 906 if (ret < 0)
833 goto error; 907 goto error;
834 908
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb2232..039b5011d83b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
77/* 77/*
78 * check the security details of the on-disk cache 78 * check the security details of the on-disk cache
79 * - must be called with security override in force 79 * - must be called with security override in force
80 * - must return with a security override in force - even in the case of an
81 * error
80 */ 82 */
81int cachefiles_determine_cache_security(struct cachefiles_cache *cache, 83int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
82 struct dentry *root, 84 struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
99 * which create files */ 101 * which create files */
100 ret = set_create_files_as(new, root->d_inode); 102 ret = set_create_files_as(new, root->d_inode);
101 if (ret < 0) { 103 if (ret < 0) {
104 abort_creds(new);
105 cachefiles_begin_secure(cache, _saved_cred);
102 _leave(" = %d [cfa]", ret); 106 _leave(" = %d [cfa]", ret);
103 return ret; 107 return ret;
104 } 108 }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 412593703d1e..a9005d862ed4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
504 int i; 504 int i;
505 struct ceph_snap_context *snapc = req->r_snapc; 505 struct ceph_snap_context *snapc = req->r_snapc;
506 struct address_space *mapping = inode->i_mapping; 506 struct address_space *mapping = inode->i_mapping;
507 struct writeback_control *wbc = req->r_wbc;
508 __s32 rc = -EIO; 507 __s32 rc = -EIO;
509 u64 bytes = 0; 508 u64 bytes = 0;
510 struct ceph_client *client = ceph_inode_to_client(inode); 509 struct ceph_client *client = ceph_inode_to_client(inode);
511 long writeback_stat; 510 long writeback_stat;
512 unsigned issued = __ceph_caps_issued(ci, NULL); 511 unsigned issued = ceph_caps_issued(ci);
513 512
514 /* parse reply */ 513 /* parse reply */
515 replyhead = msg->front.iov_base; 514 replyhead = msg->front.iov_base;
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
546 clear_bdi_congested(&client->backing_dev_info, 545 clear_bdi_congested(&client->backing_dev_info,
547 BLK_RW_ASYNC); 546 BLK_RW_ASYNC);
548 547
549 if (i >= wrote) {
550 dout("inode %p skipping page %p\n", inode, page);
551 wbc->pages_skipped++;
552 }
553 ceph_put_snap_context((void *)page->private); 548 ceph_put_snap_context((void *)page->private);
554 page->private = 0; 549 page->private = 0;
555 ClearPagePrivate(page); 550 ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
799 alloc_page_vec(client, req); 794 alloc_page_vec(client, req);
800 req->r_callback = writepages_finish; 795 req->r_callback = writepages_finish;
801 req->r_inode = inode; 796 req->r_inode = inode;
802 req->r_wbc = wbc;
803 } 797 }
804 798
805 /* note position of first page in pvec */ 799 /* note position of first page in pvec */
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b94b866..818afe72e6c7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
3#include <linux/module.h> 3#include <linux/module.h>
4#include <linux/slab.h> 4#include <linux/slab.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/slab.h>
6 7
7#include "types.h" 8#include "types.h"
8#include "auth_none.h" 9#include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c05533a31c..8164df1a08be 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
1#ifndef _FS_CEPH_AUTH_NONE_H 1#ifndef _FS_CEPH_AUTH_NONE_H
2#define _FS_CEPH_AUTH_NONE_H 2#define _FS_CEPH_AUTH_NONE_H
3 3
4#include <linux/slab.h>
5
4#include "auth.h" 6#include "auth.h"
5 7
6/* 8/*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4dc8cc..fee5a08da881 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
12#include "auth.h" 12#include "auth.h"
13#include "decode.h" 13#include "decode.h"
14 14
15struct kmem_cache *ceph_x_ticketbuf_cachep;
16
17#define TEMP_TICKET_BUF_LEN 256 15#define TEMP_TICKET_BUF_LEN 256
18 16
19static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); 17static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
131 char *ticket_buf; 129 char *ticket_buf;
132 u8 struct_v; 130 u8 struct_v;
133 131
134 dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC); 132 dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
135 if (!dbuf) 133 if (!dbuf)
136 return -ENOMEM; 134 return -ENOMEM;
137 135
138 ret = -ENOMEM; 136 ret = -ENOMEM;
139 ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, 137 ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
140 GFP_NOFS | GFP_ATOMIC);
141 if (!ticket_buf) 138 if (!ticket_buf)
142 goto out_dbuf; 139 goto out_dbuf;
143 140
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
251 248
252 ret = 0; 249 ret = 0;
253out: 250out:
254 kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf); 251 kfree(ticket_buf);
255out_dbuf: 252out_dbuf:
256 kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf); 253 kfree(dbuf);
257 return ret; 254 return ret;
258 255
259bad: 256bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
605 remove_ticket_handler(ac, th); 602 remove_ticket_handler(ac, th);
606 } 603 }
607 604
608 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
609
610 kfree(ac->private); 605 kfree(ac->private);
611 ac->private = NULL; 606 ac->private = NULL;
612} 607}
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
641 int ret; 636 int ret;
642 637
643 dout("ceph_x_init %p\n", ac); 638 dout("ceph_x_init %p\n", ac);
639 ret = -ENOMEM;
644 xi = kzalloc(sizeof(*xi), GFP_NOFS); 640 xi = kzalloc(sizeof(*xi), GFP_NOFS);
645 if (!xi) 641 if (!xi)
646 return -ENOMEM; 642 goto out;
647 643
648 ret = -ENOMEM;
649 ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
650 TEMP_TICKET_BUF_LEN, 8,
651 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
652 NULL);
653 if (!ceph_x_ticketbuf_cachep)
654 goto done_nomem;
655 ret = -EINVAL; 644 ret = -EINVAL;
656 if (!ac->secret) { 645 if (!ac->secret) {
657 pr_err("no secret set (for auth_x protocol)\n"); 646 pr_err("no secret set (for auth_x protocol)\n");
658 goto done_nomem; 647 goto out_nomem;
659 } 648 }
660 649
661 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); 650 ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
662 if (ret) 651 if (ret)
663 goto done_nomem; 652 goto out_nomem;
664 653
665 xi->starting = true; 654 xi->starting = true;
666 xi->ticket_handlers = RB_ROOT; 655 xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
670 ac->ops = &ceph_x_ops; 659 ac->ops = &ceph_x_ops;
671 return 0; 660 return 0;
672 661
673done_nomem: 662out_nomem:
674 kfree(xi); 663 kfree(xi);
675 if (ceph_x_ticketbuf_cachep) 664out:
676 kmem_cache_destroy(ceph_x_ticketbuf_cachep);
677 return ret; 665 return ret;
678} 666}
679 667
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239fa9a3b..d9400534b279 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
858} 858}
859 859
860/* 860/*
861 * Remove a cap. Take steps to deal with a racing iterate_session_caps.
862 *
861 * caller should hold i_lock. 863 * caller should hold i_lock.
862 * caller will not hold session s_mutex if called from destroy_inode. 864 * caller will not hold session s_mutex if called from destroy_inode.
863 */ 865 */
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
866 struct ceph_mds_session *session = cap->session; 868 struct ceph_mds_session *session = cap->session;
867 struct ceph_inode_info *ci = cap->ci; 869 struct ceph_inode_info *ci = cap->ci;
868 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc; 870 struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
871 int removed = 0;
869 872
870 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); 873 dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
871 874
872 /* remove from inode list */
873 rb_erase(&cap->ci_node, &ci->i_caps);
874 cap->ci = NULL;
875 if (ci->i_auth_cap == cap)
876 ci->i_auth_cap = NULL;
877
878 /* remove from session list */ 875 /* remove from session list */
879 spin_lock(&session->s_cap_lock); 876 spin_lock(&session->s_cap_lock);
880 if (session->s_cap_iterator == cap) { 877 if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
885 list_del_init(&cap->session_caps); 882 list_del_init(&cap->session_caps);
886 session->s_nr_caps--; 883 session->s_nr_caps--;
887 cap->session = NULL; 884 cap->session = NULL;
885 removed = 1;
888 } 886 }
887 /* protect backpointer with s_cap_lock: see iterate_session_caps */
888 cap->ci = NULL;
889 spin_unlock(&session->s_cap_lock); 889 spin_unlock(&session->s_cap_lock);
890 890
891 if (cap->session == NULL) 891 /* remove from inode list */
892 rb_erase(&cap->ci_node, &ci->i_caps);
893 if (ci->i_auth_cap == cap)
894 ci->i_auth_cap = NULL;
895
896 if (removed)
892 ceph_put_cap(cap); 897 ceph_put_cap(cap);
893 898
894 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) { 899 if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1861,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
1861 } else { 1866 } else {
1862 pr_err("%p auth cap %p not mds%d ???\n", inode, 1867 pr_err("%p auth cap %p not mds%d ???\n", inode,
1863 cap, session->s_mds); 1868 cap, session->s_mds);
1864 spin_unlock(&inode->i_lock);
1865 } 1869 }
1870 spin_unlock(&inode->i_lock);
1866 } 1871 }
1867} 1872}
1868 1873
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e526aa..650d2db5ed26 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
880 * do_request, above). If there is no trace, we need 880 * do_request, above). If there is no trace, we need
881 * to do it here. 881 * to do it here.
882 */ 882 */
883
884 /* d_move screws up d_subdirs order */
885 ceph_i_clear(new_dir, CEPH_I_COMPLETE);
886
883 d_move(old_dentry, new_dentry); 887 d_move(old_dentry, new_dentry);
888
889 /* ensure target dentry is invalidated, despite
890 rehashing bug in vfs_rename_dir */
891 new_dentry->d_time = jiffies;
892 ceph_dentry(new_dentry)->lease_shared_gen = 0;
884 } 893 }
885 ceph_mdsc_put_request(req); 894 ceph_mdsc_put_request(req);
886 return err; 895 return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5da2c1..ed6f19721d6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
665 * throw out any page cache pages in this range. this 665 * throw out any page cache pages in this range. this
666 * may block. 666 * may block.
667 */ 667 */
668 truncate_inode_pages_range(inode->i_mapping, pos, pos+len); 668 truncate_inode_pages_range(inode->i_mapping, pos,
669 (pos+len) | (PAGE_CACHE_SIZE-1));
669 } else { 670 } else {
670 pages = alloc_page_vector(num_pages); 671 pages = alloc_page_vector(num_pages);
671 if (IS_ERR(pages)) { 672 if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c275e8..85b4d2ffdeba 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
733 __ceph_get_fmode(ci, cap_fmode); 733 __ceph_get_fmode(ci, cap_fmode);
734 spin_unlock(&inode->i_lock); 734 spin_unlock(&inode->i_lock);
735 } 735 }
736 } else if (cap_fmode >= 0) {
737 pr_warning("mds issued no caps on %llx.%llx\n",
738 ceph_vinop(inode));
739 __ceph_get_fmode(ci, cap_fmode);
736 } 740 }
737 741
738 /* update delegation info? */ 742 /* update delegation info? */
@@ -997,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
997 dn, dn->d_name.len, dn->d_name.name); 1001 dn, dn->d_name.len, dn->d_name.name);
998 dout("fill_trace doing d_move %p -> %p\n", 1002 dout("fill_trace doing d_move %p -> %p\n",
999 req->r_old_dentry, dn); 1003 req->r_old_dentry, dn);
1004
1005 /* d_move screws up d_subdirs order */
1006 ceph_i_clear(dir, CEPH_I_COMPLETE);
1007
1000 d_move(req->r_old_dentry, dn); 1008 d_move(req->r_old_dentry, dn);
1001 dout(" src %p '%.*s' dst %p '%.*s'\n", 1009 dout(" src %p '%.*s' dst %p '%.*s'\n",
1002 req->r_old_dentry, 1010 req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47be..24561a557e01 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
736} 736}
737 737
738/* 738/*
739 * Helper to safely iterate over all caps associated with a session. 739 * Helper to safely iterate over all caps associated with a session, with
740 * special care taken to handle a racing __ceph_remove_cap().
740 * 741 *
741 * caller must hold session s_mutex 742 * Caller must hold session s_mutex.
742 */ 743 */
743static int iterate_session_caps(struct ceph_mds_session *session, 744static int iterate_session_caps(struct ceph_mds_session *session,
744 int (*cb)(struct inode *, struct ceph_cap *, 745 int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2136 struct ceph_mds_session *session = NULL; 2137 struct ceph_mds_session *session = NULL;
2137 struct ceph_msg *reply; 2138 struct ceph_msg *reply;
2138 struct rb_node *p; 2139 struct rb_node *p;
2139 int err; 2140 int err = -ENOMEM;
2140 struct ceph_pagelist *pagelist; 2141 struct ceph_pagelist *pagelist;
2141 2142
2142 pr_info("reconnect to recovering mds%d\n", mds); 2143 pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
2185 goto fail; 2186 goto fail;
2186 err = iterate_session_caps(session, encode_caps_cb, pagelist); 2187 err = iterate_session_caps(session, encode_caps_cb, pagelist);
2187 if (err < 0) 2188 if (err < 0)
2188 goto out; 2189 goto fail;
2189 2190
2190 /* 2191 /*
2191 * snaprealms. we provide mds with the ino, seq (version), and 2192 * snaprealms. we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
2213 reply->nr_pages = calc_pages_for(0, pagelist->length); 2214 reply->nr_pages = calc_pages_for(0, pagelist->length);
2214 ceph_con_send(&session->s_con, reply); 2215 ceph_con_send(&session->s_con, reply);
2215 2216
2216 if (session) { 2217 session->s_state = CEPH_MDS_SESSION_OPEN;
2217 session->s_state = CEPH_MDS_SESSION_OPEN; 2218 mutex_unlock(&session->s_mutex);
2218 __wake_requests(mdsc, &session->s_waiting); 2219
2219 } 2220 mutex_lock(&mdsc->mutex);
2221 __wake_requests(mdsc, &session->s_waiting);
2222 mutex_unlock(&mdsc->mutex);
2223
2224 ceph_put_mds_session(session);
2220 2225
2221out:
2222 up_read(&mdsc->snap_rwsem); 2226 up_read(&mdsc->snap_rwsem);
2223 if (session) {
2224 mutex_unlock(&session->s_mutex);
2225 ceph_put_mds_session(session);
2226 }
2227 mutex_lock(&mdsc->mutex); 2227 mutex_lock(&mdsc->mutex);
2228 return; 2228 return;
2229 2229
2230fail: 2230fail:
2231 ceph_msg_put(reply); 2231 ceph_msg_put(reply);
2232 up_read(&mdsc->snap_rwsem);
2233 mutex_unlock(&session->s_mutex);
2234 ceph_put_mds_session(session);
2232fail_nomsg: 2235fail_nomsg:
2233 ceph_pagelist_release(pagelist); 2236 ceph_pagelist_release(pagelist);
2234 kfree(pagelist); 2237 kfree(pagelist);
2235fail_nopagelist: 2238fail_nopagelist:
2236 pr_err("ENOMEM preparing reconnect for mds%d\n", mds); 2239 pr_err("error %d preparing reconnect for mds%d\n", err, mds);
2237 goto out; 2240 mutex_lock(&mdsc->mutex);
2241 return;
2238} 2242}
2239 2243
2240 2244
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa131add3..cd4fadb6491a 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
492 list_move_tail(&m->list_head, &con->out_sent); 492 list_move_tail(&m->list_head, &con->out_sent);
493 } 493 }
494 494
495 m->hdr.seq = cpu_to_le64(++con->out_seq); 495 /*
496 * only assign outgoing seq # if we haven't sent this message
497 * yet. if it is requeued, resend with it's original seq.
498 */
499 if (m->needs_out_seq) {
500 m->hdr.seq = cpu_to_le64(++con->out_seq);
501 m->needs_out_seq = false;
502 }
496 503
497 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", 504 dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
498 m, con->out_seq, le16_to_cpu(m->hdr.type), 505 m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1334,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
1334 unsigned front_len, middle_len, data_len, data_off; 1341 unsigned front_len, middle_len, data_len, data_off;
1335 int datacrc = con->msgr->nocrc; 1342 int datacrc = con->msgr->nocrc;
1336 int skip; 1343 int skip;
1344 u64 seq;
1337 1345
1338 dout("read_partial_message con %p msg %p\n", con, m); 1346 dout("read_partial_message con %p msg %p\n", con, m);
1339 1347
@@ -1368,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
1368 return -EIO; 1376 return -EIO;
1369 data_off = le16_to_cpu(con->in_hdr.data_off); 1377 data_off = le16_to_cpu(con->in_hdr.data_off);
1370 1378
1379 /* verify seq# */
1380 seq = le64_to_cpu(con->in_hdr.seq);
1381 if ((s64)seq - (s64)con->in_seq < 1) {
1382 pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
1383 ENTITY_NAME(con->peer_name),
1384 pr_addr(&con->peer_addr.in_addr),
1385 seq, con->in_seq + 1);
1386 con->in_base_pos = -front_len - middle_len - data_len -
1387 sizeof(m->footer);
1388 con->in_tag = CEPH_MSGR_TAG_READY;
1389 con->in_seq++;
1390 return 0;
1391 } else if ((s64)seq - (s64)con->in_seq > 1) {
1392 pr_err("read_partial_message bad seq %lld expected %lld\n",
1393 seq, con->in_seq + 1);
1394 con->error_msg = "bad message sequence # for incoming message";
1395 return -EBADMSG;
1396 }
1397
1371 /* allocate message? */ 1398 /* allocate message? */
1372 if (!con->in_msg) { 1399 if (!con->in_msg) {
1373 dout("got hdr type %d front %d data %d\n", con->in_hdr.type, 1400 dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1379,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
1379 con->in_base_pos = -front_len - middle_len - data_len - 1406 con->in_base_pos = -front_len - middle_len - data_len -
1380 sizeof(m->footer); 1407 sizeof(m->footer);
1381 con->in_tag = CEPH_MSGR_TAG_READY; 1408 con->in_tag = CEPH_MSGR_TAG_READY;
1409 con->in_seq++;
1382 return 0; 1410 return 0;
1383 } 1411 }
1384 if (IS_ERR(con->in_msg)) { 1412 if (IS_ERR(con->in_msg)) {
@@ -1965,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
1965 1993
1966 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); 1994 BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
1967 1995
1996 msg->needs_out_seq = true;
1997
1968 /* queue */ 1998 /* queue */
1969 mutex_lock(&con->mutex); 1999 mutex_lock(&con->mutex);
1970 BUG_ON(!list_empty(&msg->list_head)); 2000 BUG_ON(!list_empty(&msg->list_head));
@@ -2030,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
2030 ceph_msg_put(con->in_msg); 2060 ceph_msg_put(con->in_msg);
2031 con->in_msg = NULL; 2061 con->in_msg = NULL;
2032 con->in_tag = CEPH_MSGR_TAG_READY; 2062 con->in_tag = CEPH_MSGR_TAG_READY;
2063 con->in_seq++;
2033 } else { 2064 } else {
2034 dout("con_revoke_pages %p msg %p pages %p no-op\n", 2065 dout("con_revoke_pages %p msg %p pages %p no-op\n",
2035 con, con->in_msg, msg); 2066 con, con->in_msg, msg);
@@ -2063,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
2063 kref_init(&m->kref); 2094 kref_init(&m->kref);
2064 INIT_LIST_HEAD(&m->list_head); 2095 INIT_LIST_HEAD(&m->list_head);
2065 2096
2097 m->hdr.tid = 0;
2066 m->hdr.type = cpu_to_le16(type); 2098 m->hdr.type = cpu_to_le16(type);
2099 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
2100 m->hdr.version = 0;
2067 m->hdr.front_len = cpu_to_le32(front_len); 2101 m->hdr.front_len = cpu_to_le32(front_len);
2068 m->hdr.middle_len = 0; 2102 m->hdr.middle_len = 0;
2069 m->hdr.data_len = cpu_to_le32(page_len); 2103 m->hdr.data_len = cpu_to_le32(page_len);
2070 m->hdr.data_off = cpu_to_le16(page_off); 2104 m->hdr.data_off = cpu_to_le16(page_off);
2071 m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); 2105 m->hdr.reserved = 0;
2072 m->footer.front_crc = 0; 2106 m->footer.front_crc = 0;
2073 m->footer.middle_crc = 0; 2107 m->footer.middle_crc = 0;
2074 m->footer.data_crc = 0; 2108 m->footer.data_crc = 0;
2109 m->footer.flags = 0;
2075 m->front_max = front_len; 2110 m->front_max = front_len;
2076 m->front_is_vmalloc = false; 2111 m->front_is_vmalloc = false;
2077 m->more_to_follow = false; 2112 m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cdc..a5caf91cc971 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
86 struct kref kref; 86 struct kref kref;
87 bool front_is_vmalloc; 87 bool front_is_vmalloc;
88 bool more_to_follow; 88 bool more_to_follow;
89 bool needs_out_seq;
89 int front_max; 90 int front_max;
90 91
91 struct ceph_msgpool *pool; 92 struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace6..3514f71ff85f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
565{ 565{
566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; 566 struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
567 struct ceph_pg pgid; 567 struct ceph_pg pgid;
568 int o = -1; 568 int acting[CEPH_PG_MAX_SIZE];
569 int o = -1, num = 0;
569 int err; 570 int err;
570 571
571 dout("map_osds %p tid %lld\n", req, req->r_tid); 572 dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
576 pgid = reqhead->layout.ol_pgid; 577 pgid = reqhead->layout.ol_pgid;
577 req->r_pgid = pgid; 578 req->r_pgid = pgid;
578 579
579 o = ceph_calc_pg_primary(osdc->osdmap, pgid); 580 err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
581 if (err > 0) {
582 o = acting[0];
583 num = err;
584 }
580 585
581 if ((req->r_osd && req->r_osd->o_osd == o && 586 if ((req->r_osd && req->r_osd->o_osd == o &&
582 req->r_sent >= req->r_osd->o_incarnation) || 587 req->r_sent >= req->r_osd->o_incarnation &&
588 req->r_num_pg_osds == num &&
589 memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
583 (req->r_osd == NULL && o == -1)) 590 (req->r_osd == NULL && o == -1))
584 return 0; /* no change */ 591 return 0; /* no change */
585 592
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
587 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, 594 req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
588 req->r_osd ? req->r_osd->o_osd : -1); 595 req->r_osd ? req->r_osd->o_osd : -1);
589 596
597 /* record full pg acting set */
598 memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
599 req->r_num_pg_osds = num;
600
590 if (req->r_osd) { 601 if (req->r_osd) {
591 __cancel_request(req); 602 __cancel_request(req);
592 list_del_init(&req->r_osd_item); 603 list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
612 __remove_osd_from_lru(req->r_osd); 623 __remove_osd_from_lru(req->r_osd);
613 list_add(&req->r_osd_item, &req->r_osd->o_requests); 624 list_add(&req->r_osd_item, &req->r_osd->o_requests);
614 } 625 }
615 err = 1; /* osd changed */ 626 err = 1; /* osd or pg changed */
616 627
617out: 628out:
618 return err; 629 return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
779 struct ceph_osd_request *req; 790 struct ceph_osd_request *req;
780 u64 tid; 791 u64 tid;
781 int numops, object_len, flags; 792 int numops, object_len, flags;
793 s32 result;
782 794
783 tid = le64_to_cpu(msg->hdr.tid); 795 tid = le64_to_cpu(msg->hdr.tid);
784 if (msg->front.iov_len < sizeof(*rhead)) 796 if (msg->front.iov_len < sizeof(*rhead))
785 goto bad; 797 goto bad;
786 numops = le32_to_cpu(rhead->num_ops); 798 numops = le32_to_cpu(rhead->num_ops);
787 object_len = le32_to_cpu(rhead->object_len); 799 object_len = le32_to_cpu(rhead->object_len);
800 result = le32_to_cpu(rhead->result);
788 if (msg->front.iov_len != sizeof(*rhead) + object_len + 801 if (msg->front.iov_len != sizeof(*rhead) + object_len +
789 numops * sizeof(struct ceph_osd_op)) 802 numops * sizeof(struct ceph_osd_op))
790 goto bad; 803 goto bad;
791 dout("handle_reply %p tid %llu\n", msg, tid); 804 dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
792 805
793 /* lookup */ 806 /* lookup */
794 mutex_lock(&osdc->request_mutex); 807 mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
834 dout("handle_reply tid %llu flags %d\n", tid, flags); 847 dout("handle_reply tid %llu flags %d\n", tid, flags);
835 848
836 /* either this is a read, or we got the safe response */ 849 /* either this is a read, or we got the safe response */
837 if ((flags & CEPH_OSD_FLAG_ONDISK) || 850 if (result < 0 ||
851 (flags & CEPH_OSD_FLAG_ONDISK) ||
838 ((flags & CEPH_OSD_FLAG_WRITE) == 0)) 852 ((flags & CEPH_OSD_FLAG_WRITE) == 0))
839 __unregister_request(osdc, req); 853 __unregister_request(osdc, req);
840 854
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c3..ce776989ef6a 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
48 struct list_head r_osd_item; 48 struct list_head r_osd_item;
49 struct ceph_osd *r_osd; 49 struct ceph_osd *r_osd;
50 struct ceph_pg r_pgid; 50 struct ceph_pg r_pgid;
51 int r_pg_osds[CEPH_PG_MAX_SIZE];
52 int r_num_pg_osds;
51 53
52 struct ceph_connection *r_con_filling_msg; 54 struct ceph_connection *r_con_filling_msg;
53 55
@@ -66,7 +68,6 @@ struct ceph_osd_request {
66 struct list_head r_unsafe_item; 68 struct list_head r_unsafe_item;
67 69
68 struct inode *r_inode; /* for use by callbacks */ 70 struct inode *r_inode; /* for use by callbacks */
69 struct writeback_control *r_wbc; /* ditto */
70 71
71 char r_oid[40]; /* object name */ 72 char r_oid[40]; /* object name */
72 int r_oid_len; 73 int r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15eed82a..cfdd8f4388b7 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1041} 1041}
1042 1042
1043/* 1043/*
1044 * Return acting set for given pgid.
1045 */
1046int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
1047 int *acting)
1048{
1049 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1050 int i, o, num = CEPH_PG_MAX_SIZE;
1051
1052 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1053 if (!osds)
1054 return -1;
1055
1056 /* primary is first up osd */
1057 o = 0;
1058 for (i = 0; i < num; i++)
1059 if (ceph_osd_is_up(osdmap, osds[i]))
1060 acting[o++] = osds[i];
1061 return o;
1062}
1063
1064/*
1044 * Return primary osd for given pgid, or -1 if none. 1065 * Return primary osd for given pgid, or -1 if none.
1045 */ 1066 */
1046int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) 1067int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1047{ 1068{
1048 int rawosds[10], *osds; 1069 int rawosds[CEPH_PG_MAX_SIZE], *osds;
1049 int i, num = ARRAY_SIZE(rawosds); 1070 int i, num = CEPH_PG_MAX_SIZE;
1050 1071
1051 osds = calc_pg_raw(osdmap, pgid, rawosds, &num); 1072 osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
1052 if (!osds) 1073 if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
1054 1075
1055 /* primary is first up osd */ 1076 /* primary is first up osd */
1056 for (i = 0; i < num; i++) 1077 for (i = 0; i < num; i++)
1057 if (ceph_osd_is_up(osdmap, osds[i])) { 1078 if (ceph_osd_is_up(osdmap, osds[i]))
1058 return osds[i]; 1079 return osds[i];
1059 break;
1060 }
1061 return -1; 1080 return -1;
1062} 1081}
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e4f562..970b547e510d 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
120 const char *oid, 120 const char *oid,
121 struct ceph_file_layout *fl, 121 struct ceph_file_layout *fl,
122 struct ceph_osdmap *osdmap); 122 struct ceph_osdmap *osdmap);
123extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
124 int *acting);
123extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, 125extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
124 struct ceph_pg pgid); 126 struct ceph_pg pgid);
125 127
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d017b58..fd56451a871f 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
58#define CEPH_PG_LAYOUT_LINEAR 2 58#define CEPH_PG_LAYOUT_LINEAR 2
59#define CEPH_PG_LAYOUT_HYBRID 3 59#define CEPH_PG_LAYOUT_HYBRID 3
60 60
61#define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */
61 62
62/* 63/*
63 * placement group. 64 * placement group.
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2b881262ef67..d5114db70453 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -869,16 +869,20 @@ skip_inode:
869 continue; 869 continue;
870 ci = ceph_inode(inode); 870 ci = ceph_inode(inode);
871 spin_lock(&inode->i_lock); 871 spin_lock(&inode->i_lock);
872 if (!ci->i_snap_realm) 872 if (list_empty(&ci->i_snap_realm_item)) {
873 goto split_skip_inode; 873 struct ceph_snap_realm *oldrealm =
874 ceph_put_snap_realm(mdsc, ci->i_snap_realm); 874 ci->i_snap_realm;
875 spin_lock(&realm->inodes_with_caps_lock); 875
876 list_add(&ci->i_snap_realm_item, 876 dout(" moving %p to split realm %llx %p\n",
877 &realm->inodes_with_caps); 877 inode, realm->ino, realm);
878 ci->i_snap_realm = realm; 878 spin_lock(&realm->inodes_with_caps_lock);
879 spin_unlock(&realm->inodes_with_caps_lock); 879 list_add(&ci->i_snap_realm_item,
880 ceph_get_snap_realm(mdsc, realm); 880 &realm->inodes_with_caps);
881split_skip_inode: 881 ci->i_snap_realm = realm;
882 spin_unlock(&realm->inodes_with_caps_lock);
883 ceph_get_snap_realm(mdsc, realm);
884 ceph_put_snap_realm(mdsc, oldrealm);
885 }
882 spin_unlock(&inode->i_lock); 886 spin_unlock(&inode->i_lock);
883 iput(inode); 887 iput(inode);
884 } 888 }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02eaa1279..110857ba9269 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
47 */ 47 */
48static void ceph_put_super(struct super_block *s) 48static void ceph_put_super(struct super_block *s)
49{ 49{
50 struct ceph_client *cl = ceph_client(s); 50 struct ceph_client *client = ceph_sb_to_client(s);
51 51
52 dout("put_super\n"); 52 dout("put_super\n");
53 ceph_mdsc_close_sessions(&cl->mdsc); 53 ceph_mdsc_close_sessions(&client->mdsc);
54
55 /*
56 * ensure we release the bdi before put_anon_super releases
57 * the device name.
58 */
59 if (s->s_bdi == &client->backing_dev_info) {
60 bdi_unregister(&client->backing_dev_info);
61 s->s_bdi = NULL;
62 }
63
54 return; 64 return;
55} 65}
56 66
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
636 destroy_workqueue(client->pg_inv_wq); 646 destroy_workqueue(client->pg_inv_wq);
637 destroy_workqueue(client->trunc_wq); 647 destroy_workqueue(client->trunc_wq);
638 648
649 bdi_destroy(&client->backing_dev_info);
650
639 if (client->msgr) 651 if (client->msgr)
640 ceph_messenger_destroy(client->msgr); 652 ceph_messenger_destroy(client->msgr);
641 mempool_destroy(client->wb_pagevec_pool); 653 mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
876{ 888{
877 int err; 889 int err;
878 890
879 sb->s_bdi = &client->backing_dev_info;
880
881 /* set ra_pages based on rsize mount option? */ 891 /* set ra_pages based on rsize mount option? */
882 if (client->mount_args->rsize >= PAGE_CACHE_SIZE) 892 if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
883 client->backing_dev_info.ra_pages = 893 client->backing_dev_info.ra_pages =
884 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) 894 (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
885 >> PAGE_SHIFT; 895 >> PAGE_SHIFT;
886 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev); 896 err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
897 if (!err)
898 sb->s_bdi = &client->backing_dev_info;
887 return err; 899 return err;
888} 900}
889 901
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
957 dout("kill_sb %p\n", s); 969 dout("kill_sb %p\n", s);
958 ceph_mdsc_pre_umount(&client->mdsc); 970 ceph_mdsc_pre_umount(&client->mdsc);
959 kill_anon_super(s); /* will call put_super after sb is r/o */ 971 kill_anon_super(s); /* will call put_super after sb is r/o */
960 if (s->s_bdi == &client->backing_dev_info)
961 bdi_unregister(&client->backing_dev_info);
962 bdi_destroy(&client->backing_dev_info);
963 ceph_destroy_client(client); 972 ceph_destroy_client(client);
964} 973}
965 974
@@ -996,9 +1005,10 @@ static int __init init_ceph(void)
996 if (ret) 1005 if (ret)
997 goto out_icache; 1006 goto out_icache;
998 1007
999 pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n", 1008 pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
1000 CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH, 1009 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
1001 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL); 1010 CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
1011 CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
1002 return 0; 1012 return 0;
1003 1013
1004out_icache: 1014out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e30dfbb056c3..13513b80d87f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
10#include <linux/fs.h> 10#include <linux/fs.h>
11#include <linux/mempool.h> 11#include <linux/mempool.h>
12#include <linux/pagemap.h> 12#include <linux/pagemap.h>
13#include <linux/slab.h>
13#include <linux/wait.h> 14#include <linux/wait.h>
14#include <linux/writeback.h> 15#include <linux/writeback.h>
15#include <linux/slab.h> 16#include <linux/slab.h>
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ecf0ffbe2b64..0c2fd17439c8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -502,6 +502,7 @@ struct dfs_info3_param {
502#define CIFS_FATTR_DFS_REFERRAL 0x1 502#define CIFS_FATTR_DFS_REFERRAL 0x1
503#define CIFS_FATTR_DELETE_PENDING 0x2 503#define CIFS_FATTR_DELETE_PENDING 0x2
504#define CIFS_FATTR_NEED_REVAL 0x4 504#define CIFS_FATTR_NEED_REVAL 0x4
505#define CIFS_FATTR_INO_COLLISION 0x8
505 506
506struct cifs_fattr { 507struct cifs_fattr {
507 u32 cf_flags; 508 u32 cf_flags;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35ec11716213..29b9ea244c81 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid) 715 if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
716 return 0; 716 return 0;
717 717
718 /*
719 * uh oh -- it's a directory. We can't use it since hardlinked dirs are
720 * verboten. Disable serverino and return it as if it were found, the
721 * caller can discard it, generate a uniqueid and retry the find
722 */
723 if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
724 fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
725 cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
726 }
727
718 return 1; 728 return 1;
719} 729}
720 730
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
734 unsigned long hash; 744 unsigned long hash;
735 struct inode *inode; 745 struct inode *inode;
736 746
747retry_iget5_locked:
737 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid)); 748 cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
738 749
739 /* hash down to 32-bits on 32-bit arch */ 750 /* hash down to 32-bits on 32-bit arch */
740 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); 751 hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
741 752
742 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr); 753 inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
743
744 /* we have fattrs in hand, update the inode */
745 if (inode) { 754 if (inode) {
755 /* was there a problematic inode number collision? */
756 if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
757 iput(inode);
758 fattr->cf_uniqueid = iunique(sb, ROOT_I);
759 fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
760 goto retry_iget5_locked;
761 }
762
746 cifs_fattr_to_inode(inode, fattr); 763 cifs_fattr_to_inode(inode, fattr);
747 if (sb->s_flags & MS_NOATIME) 764 if (sb->s_flags & MS_NOATIME)
748 inode->i_flags |= S_NOATIME | S_NOCMTIME; 765 inode->i_flags |= S_NOATIME | S_NOCMTIME;
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc478..05448730f840 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
1531 if (retval < 0) 1531 if (retval < 0)
1532 goto out; 1532 goto out;
1533 1533
1534 current->stack_start = current->mm->start_stack;
1535
1536 /* execve succeeded */ 1534 /* execve succeeded */
1537 current->fs->in_exec = 0; 1535 current->fs->in_exec = 0;
1538 current->in_execve = 0; 1536 current->in_execve = 0;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c32a1b6a856b..641640dc7ae5 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -102,7 +102,6 @@
102#include <linux/nbd.h> 102#include <linux/nbd.h>
103#include <linux/random.h> 103#include <linux/random.h>
104#include <linux/filter.h> 104#include <linux/filter.h>
105#include <linux/pktcdvd.h>
106 105
107#include <linux/hiddev.h> 106#include <linux/hiddev.h>
108 107
@@ -1126,8 +1125,6 @@ COMPATIBLE_IOCTL(PPGETMODE)
1126COMPATIBLE_IOCTL(PPGETPHASE) 1125COMPATIBLE_IOCTL(PPGETPHASE)
1127COMPATIBLE_IOCTL(PPGETFLAGS) 1126COMPATIBLE_IOCTL(PPGETFLAGS)
1128COMPATIBLE_IOCTL(PPSETFLAGS) 1127COMPATIBLE_IOCTL(PPSETFLAGS)
1129/* pktcdvd */
1130COMPATIBLE_IOCTL(PACKET_CTRL_CMD)
1131/* Big A */ 1128/* Big A */
1132/* sparc only */ 1129/* sparc only */
1133/* Big Q for sound/OSS */ 1130/* Big Q for sound/OSS */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205aa..0b502f80c691 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
645 645
646 configfs_detach_group(sd->s_element); 646 configfs_detach_group(sd->s_element);
647 child->d_inode->i_flags |= S_DEAD; 647 child->d_inode->i_flags |= S_DEAD;
648 dont_mount(child);
648 649
649 mutex_unlock(&child->d_inode->i_mutex); 650 mutex_unlock(&child->d_inode->i_mutex);
650 651
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
840 mutex_lock(&dentry->d_inode->i_mutex); 841 mutex_lock(&dentry->d_inode->i_mutex);
841 configfs_remove_dir(item); 842 configfs_remove_dir(item);
842 dentry->d_inode->i_flags |= S_DEAD; 843 dentry->d_inode->i_flags |= S_DEAD;
844 dont_mount(dentry);
843 mutex_unlock(&dentry->d_inode->i_mutex); 845 mutex_unlock(&dentry->d_inode->i_mutex);
844 d_delete(dentry); 846 d_delete(dentry);
845 } 847 }
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
882 if (ret) { 884 if (ret) {
883 configfs_detach_item(item); 885 configfs_detach_item(item);
884 dentry->d_inode->i_flags |= S_DEAD; 886 dentry->d_inode->i_flags |= S_DEAD;
887 dont_mount(dentry);
885 } 888 }
886 configfs_adjust_dir_dirent_depth_after_populate(sd); 889 configfs_adjust_dir_dirent_depth_after_populate(sd);
887 mutex_unlock(&dentry->d_inode->i_mutex); 890 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1725 mutex_unlock(&configfs_symlink_mutex); 1728 mutex_unlock(&configfs_symlink_mutex);
1726 configfs_detach_group(&group->cg_item); 1729 configfs_detach_group(&group->cg_item);
1727 dentry->d_inode->i_flags |= S_DEAD; 1730 dentry->d_inode->i_flags |= S_DEAD;
1731 dont_mount(dentry);
1728 mutex_unlock(&dentry->d_inode->i_mutex); 1732 mutex_unlock(&dentry->d_inode->i_mutex);
1729 1733
1730 d_delete(dentry); 1734 d_delete(dentry);
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b9..e6e94c626c2c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
1387 if (retval < 0) 1387 if (retval < 0)
1388 goto out; 1388 goto out;
1389 1389
1390 current->stack_start = current->mm->start_stack;
1391
1392 /* execve succeeded */ 1390 /* execve succeeded */
1393 current->fs->in_exec = 0; 1391 current->fs->in_exec = 0;
1394 current->in_execve = 0; 1392 current->in_execve = 0;
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index 54373278a353..22721b2fd890 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -85,6 +85,7 @@ struct exofs_sb_info {
85 u32 s_next_generation; /* next gen # to use */ 85 u32 s_next_generation; /* next gen # to use */
86 atomic_t s_curr_pending; /* number of pending commands */ 86 atomic_t s_curr_pending; /* number of pending commands */
87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */ 87 uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
88 struct backing_dev_info bdi; /* register our bdi with VFS */
88 89
89 struct pnfs_osd_data_map data_map; /* Default raid to use 90 struct pnfs_osd_data_map data_map; /* Default raid to use
90 * FIXME: Needed ? 91 * FIXME: Needed ?
@@ -93,7 +94,6 @@ struct exofs_sb_info {
93 struct exofs_layout layout; /* Default files layout, 94 struct exofs_layout layout; /* Default files layout,
94 * contains the variable osd_dev 95 * contains the variable osd_dev
95 * array. Keep last */ 96 * array. Keep last */
96 struct backing_dev_info bdi;
97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */ 97 struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
98}; 98};
99 99
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 157382fa6256..b66832ac33ac 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
446 /* initialize the mount flag and determine the default error handler */ 446 /* initialize the mount flag and determine the default error handler */
447 flag = JFS_ERR_REMOUNT_RO; 447 flag = JFS_ERR_REMOUNT_RO;
448 448
449 if (!parse_options((char *) data, sb, &newLVSize, &flag)) { 449 if (!parse_options((char *) data, sb, &newLVSize, &flag))
450 kfree(sbi); 450 goto out_kfree;
451 return -EINVAL;
452 }
453 sbi->flag = flag; 451 sbi->flag = flag;
454 452
455#ifdef CONFIG_JFS_POSIX_ACL 453#ifdef CONFIG_JFS_POSIX_ACL
@@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
458 456
459 if (newLVSize) { 457 if (newLVSize) {
460 printk(KERN_ERR "resize option for remount only\n"); 458 printk(KERN_ERR "resize option for remount only\n");
461 return -EINVAL; 459 goto out_kfree;
462 } 460 }
463 461
464 /* 462 /*
@@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
478 inode = new_inode(sb); 476 inode = new_inode(sb);
479 if (inode == NULL) { 477 if (inode == NULL) {
480 ret = -ENOMEM; 478 ret = -ENOMEM;
481 goto out_kfree; 479 goto out_unload;
482 } 480 }
483 inode->i_ino = 0; 481 inode->i_ino = 0;
484 inode->i_nlink = 1; 482 inode->i_nlink = 1;
@@ -550,9 +548,10 @@ out_mount_failed:
550 make_bad_inode(sbi->direct_inode); 548 make_bad_inode(sbi->direct_inode);
551 iput(sbi->direct_inode); 549 iput(sbi->direct_inode);
552 sbi->direct_inode = NULL; 550 sbi->direct_inode = NULL;
553out_kfree: 551out_unload:
554 if (sbi->nls_tab) 552 if (sbi->nls_tab)
555 unload_nls(sbi->nls_tab); 553 unload_nls(sbi->nls_tab);
554out_kfree:
556 kfree(sbi); 555 kfree(sbi);
557 return ret; 556 return ret;
558} 557}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 2b2693851969..d651e10a1e9c 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -337,27 +337,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
337 goto fail; 337 goto fail;
338 338
339 sb->s_root = d_alloc_root(rootdir); 339 sb->s_root = d_alloc_root(rootdir);
340 if (!sb->s_root) 340 if (!sb->s_root) {
341 goto fail2; 341 iput(rootdir);
342 goto fail;
343 }
342 344
343 super->s_erase_page = alloc_pages(GFP_KERNEL, 0); 345 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
344 if (!super->s_erase_page) 346 if (!super->s_erase_page)
345 goto fail2; 347 goto fail;
346 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE); 348 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
347 349
348 /* FIXME: check for read-only mounts */ 350 /* FIXME: check for read-only mounts */
349 err = logfs_make_writeable(sb); 351 err = logfs_make_writeable(sb);
350 if (err) 352 if (err)
351 goto fail3; 353 goto fail1;
352 354
353 log_super("LogFS: Finished mounting\n"); 355 log_super("LogFS: Finished mounting\n");
354 simple_set_mnt(mnt, sb); 356 simple_set_mnt(mnt, sb);
355 return 0; 357 return 0;
356 358
357fail3: 359fail1:
358 __free_page(super->s_erase_page); 360 __free_page(super->s_erase_page);
359fail2:
360 iput(rootdir);
361fail: 361fail:
362 iput(logfs_super(sb)->s_master_inode); 362 iput(logfs_super(sb)->s_master_inode);
363 return -EIO; 363 return -EIO;
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e42..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
1641 if (nd->last.name[nd->last.len]) { 1641 if (nd->last.name[nd->last.len]) {
1642 if (open_flag & O_CREAT) 1642 if (open_flag & O_CREAT)
1643 goto exit; 1643 goto exit;
1644 nd->flags |= LOOKUP_DIRECTORY; 1644 nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
1645 } 1645 }
1646 1646
1647 /* just plain open? */ 1647 /* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
1830 } 1830 }
1831 if (open_flag & O_DIRECTORY) 1831 if (open_flag & O_DIRECTORY)
1832 nd.flags |= LOOKUP_DIRECTORY; 1832 nd.flags |= LOOKUP_DIRECTORY;
1833 if (!(open_flag & O_NOFOLLOW))
1834 nd.flags |= LOOKUP_FOLLOW;
1833 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname); 1835 filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
1834 while (unlikely(!filp)) { /* trailing symlink */ 1836 while (unlikely(!filp)) { /* trailing symlink */
1835 struct path holder; 1837 struct path holder;
@@ -1837,7 +1839,7 @@ reval:
1837 void *cookie; 1839 void *cookie;
1838 error = -ELOOP; 1840 error = -ELOOP;
1839 /* S_ISDIR part is a temporary automount kludge */ 1841 /* S_ISDIR part is a temporary automount kludge */
1840 if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode)) 1842 if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
1841 goto exit_dput; 1843 goto exit_dput;
1842 if (count++ == 32) 1844 if (count++ == 32)
1843 goto exit_dput; 1845 goto exit_dput;
@@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2174 error = security_inode_rmdir(dir, dentry); 2176 error = security_inode_rmdir(dir, dentry);
2175 if (!error) { 2177 if (!error) {
2176 error = dir->i_op->rmdir(dir, dentry); 2178 error = dir->i_op->rmdir(dir, dentry);
2177 if (!error) 2179 if (!error) {
2178 dentry->d_inode->i_flags |= S_DEAD; 2180 dentry->d_inode->i_flags |= S_DEAD;
2181 dont_mount(dentry);
2182 }
2179 } 2183 }
2180 } 2184 }
2181 mutex_unlock(&dentry->d_inode->i_mutex); 2185 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2259 if (!error) { 2263 if (!error) {
2260 error = dir->i_op->unlink(dir, dentry); 2264 error = dir->i_op->unlink(dir, dentry);
2261 if (!error) 2265 if (!error)
2262 dentry->d_inode->i_flags |= S_DEAD; 2266 dont_mount(dentry);
2263 } 2267 }
2264 } 2268 }
2265 mutex_unlock(&dentry->d_inode->i_mutex); 2269 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2570 return error; 2574 return error;
2571 2575
2572 target = new_dentry->d_inode; 2576 target = new_dentry->d_inode;
2573 if (target) { 2577 if (target)
2574 mutex_lock(&target->i_mutex); 2578 mutex_lock(&target->i_mutex);
2575 dentry_unhash(new_dentry);
2576 }
2577 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) 2579 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2578 error = -EBUSY; 2580 error = -EBUSY;
2579 else 2581 else {
2582 if (target)
2583 dentry_unhash(new_dentry);
2580 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2584 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2585 }
2581 if (target) { 2586 if (target) {
2582 if (!error) 2587 if (!error) {
2583 target->i_flags |= S_DEAD; 2588 target->i_flags |= S_DEAD;
2589 dont_mount(new_dentry);
2590 }
2584 mutex_unlock(&target->i_mutex); 2591 mutex_unlock(&target->i_mutex);
2585 if (d_unhashed(new_dentry)) 2592 if (d_unhashed(new_dentry))
2586 d_rehash(new_dentry); 2593 d_rehash(new_dentry);
@@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2612 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); 2619 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2613 if (!error) { 2620 if (!error) {
2614 if (target) 2621 if (target)
2615 target->i_flags |= S_DEAD; 2622 dont_mount(new_dentry);
2616 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) 2623 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2617 d_move(old_dentry, new_dentry); 2624 d_move(old_dentry, new_dentry);
2618 } 2625 }
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..f20cb57d1067 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
1432 1432
1433 err = -ENOENT; 1433 err = -ENOENT;
1434 mutex_lock(&path->dentry->d_inode->i_mutex); 1434 mutex_lock(&path->dentry->d_inode->i_mutex);
1435 if (IS_DEADDIR(path->dentry->d_inode)) 1435 if (cant_mount(path->dentry))
1436 goto out_unlock; 1436 goto out_unlock;
1437 1437
1438 err = security_sb_check_sb(mnt, path); 1438 err = security_sb_check_sb(mnt, path);
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name)
1623 1623
1624 err = -ENOENT; 1624 err = -ENOENT;
1625 mutex_lock(&path->dentry->d_inode->i_mutex); 1625 mutex_lock(&path->dentry->d_inode->i_mutex);
1626 if (IS_DEADDIR(path->dentry->d_inode)) 1626 if (cant_mount(path->dentry))
1627 goto out1; 1627 goto out1;
1628 1628
1629 if (d_unlinked(path->dentry)) 1629 if (d_unlinked(path->dentry))
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
2234 if (!check_mnt(root.mnt)) 2234 if (!check_mnt(root.mnt))
2235 goto out2; 2235 goto out2;
2236 error = -ENOENT; 2236 error = -ENOENT;
2237 if (IS_DEADDIR(new.dentry->d_inode)) 2237 if (cant_mount(old.dentry))
2238 goto out2; 2238 goto out2;
2239 if (d_unlinked(new.dentry)) 2239 if (d_unlinked(new.dentry))
2240 goto out2; 2240 goto out2;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a8766c4ef2e0..acc9c4943b84 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -966,6 +966,8 @@ out_error:
966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source) 966static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
967{ 967{
968 target->flags = source->flags; 968 target->flags = source->flags;
969 target->rsize = source->rsize;
970 target->wsize = source->wsize;
969 target->acregmin = source->acregmin; 971 target->acregmin = source->acregmin;
970 target->acregmax = source->acregmax; 972 target->acregmax = source->acregmax;
971 target->acdirmin = source->acdirmin; 973 target->acdirmin = source->acdirmin;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 15671245c6ee..ea61d26e7871 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
24 24
25static void nfs_do_free_delegation(struct nfs_delegation *delegation) 25static void nfs_do_free_delegation(struct nfs_delegation *delegation)
26{ 26{
27 if (delegation->cred)
28 put_rpccred(delegation->cred);
27 kfree(delegation); 29 kfree(delegation);
28} 30}
29 31
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
36 38
37static void nfs_free_delegation(struct nfs_delegation *delegation) 39static void nfs_free_delegation(struct nfs_delegation *delegation)
38{ 40{
39 struct rpc_cred *cred;
40
41 cred = rcu_dereference(delegation->cred);
42 rcu_assign_pointer(delegation->cred, NULL);
43 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 41 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
44 if (cred)
45 put_rpccred(cred);
46} 42}
47 43
48void nfs_mark_delegation_referenced(struct nfs_delegation *delegation) 44void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
129 */ 125 */
130void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) 126void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
131{ 127{
132 struct nfs_delegation *delegation = NFS_I(inode)->delegation; 128 struct nfs_delegation *delegation;
133 struct rpc_cred *oldcred; 129 struct rpc_cred *oldcred = NULL;
134 130
135 if (delegation == NULL) 131 rcu_read_lock();
136 return; 132 delegation = rcu_dereference(NFS_I(inode)->delegation);
137 memcpy(delegation->stateid.data, res->delegation.data, 133 if (delegation != NULL) {
138 sizeof(delegation->stateid.data)); 134 spin_lock(&delegation->lock);
139 delegation->type = res->delegation_type; 135 if (delegation->inode != NULL) {
140 delegation->maxsize = res->maxsize; 136 memcpy(delegation->stateid.data, res->delegation.data,
141 oldcred = delegation->cred; 137 sizeof(delegation->stateid.data));
142 delegation->cred = get_rpccred(cred); 138 delegation->type = res->delegation_type;
143 clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags); 139 delegation->maxsize = res->maxsize;
144 NFS_I(inode)->delegation_state = delegation->type; 140 oldcred = delegation->cred;
145 smp_wmb(); 141 delegation->cred = get_rpccred(cred);
146 put_rpccred(oldcred); 142 clear_bit(NFS_DELEGATION_NEED_RECLAIM,
143 &delegation->flags);
144 NFS_I(inode)->delegation_state = delegation->type;
145 spin_unlock(&delegation->lock);
146 put_rpccred(oldcred);
147 rcu_read_unlock();
148 } else {
149 /* We appear to have raced with a delegation return. */
150 spin_unlock(&delegation->lock);
151 rcu_read_unlock();
152 nfs_inode_set_delegation(inode, cred, res);
153 }
154 } else {
155 rcu_read_unlock();
156 }
147} 157}
148 158
149static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync) 159static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
166 return inode; 176 return inode;
167} 177}
168 178
169static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid) 179static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
180 const nfs4_stateid *stateid,
181 struct nfs_client *clp)
170{ 182{
171 struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation); 183 struct nfs_delegation *delegation =
184 rcu_dereference_protected(nfsi->delegation,
185 lockdep_is_held(&clp->cl_lock));
172 186
173 if (delegation == NULL) 187 if (delegation == NULL)
174 goto nomatch; 188 goto nomatch;
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
195{ 209{
196 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 210 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
197 struct nfs_inode *nfsi = NFS_I(inode); 211 struct nfs_inode *nfsi = NFS_I(inode);
198 struct nfs_delegation *delegation; 212 struct nfs_delegation *delegation, *old_delegation;
199 struct nfs_delegation *freeme = NULL; 213 struct nfs_delegation *freeme = NULL;
200 int status = 0; 214 int status = 0;
201 215
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
213 spin_lock_init(&delegation->lock); 227 spin_lock_init(&delegation->lock);
214 228
215 spin_lock(&clp->cl_lock); 229 spin_lock(&clp->cl_lock);
216 if (rcu_dereference(nfsi->delegation) != NULL) { 230 old_delegation = rcu_dereference_protected(nfsi->delegation,
217 if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, 231 lockdep_is_held(&clp->cl_lock));
218 sizeof(delegation->stateid)) == 0 && 232 if (old_delegation != NULL) {
219 delegation->type == nfsi->delegation->type) { 233 if (memcmp(&delegation->stateid, &old_delegation->stateid,
234 sizeof(old_delegation->stateid)) == 0 &&
235 delegation->type == old_delegation->type) {
220 goto out; 236 goto out;
221 } 237 }
222 /* 238 /*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
226 dfprintk(FILE, "%s: server %s handed out " 242 dfprintk(FILE, "%s: server %s handed out "
227 "a duplicate delegation!\n", 243 "a duplicate delegation!\n",
228 __func__, clp->cl_hostname); 244 __func__, clp->cl_hostname);
229 if (delegation->type <= nfsi->delegation->type) { 245 if (delegation->type <= old_delegation->type) {
230 freeme = delegation; 246 freeme = delegation;
231 delegation = NULL; 247 delegation = NULL;
232 goto out; 248 goto out;
233 } 249 }
234 freeme = nfs_detach_delegation_locked(nfsi, NULL); 250 freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
235 } 251 }
236 list_add_rcu(&delegation->super_list, &clp->cl_delegations); 252 list_add_rcu(&delegation->super_list, &clp->cl_delegations);
237 nfsi->delegation_state = delegation->type; 253 nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
301 if (inode == NULL) 317 if (inode == NULL)
302 continue; 318 continue;
303 spin_lock(&clp->cl_lock); 319 spin_lock(&clp->cl_lock);
304 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 320 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
305 spin_unlock(&clp->cl_lock); 321 spin_unlock(&clp->cl_lock);
306 rcu_read_unlock(); 322 rcu_read_unlock();
307 if (delegation != NULL) { 323 if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
330 struct nfs_inode *nfsi = NFS_I(inode); 346 struct nfs_inode *nfsi = NFS_I(inode);
331 struct nfs_delegation *delegation; 347 struct nfs_delegation *delegation;
332 348
333 if (rcu_dereference(nfsi->delegation) != NULL) { 349 if (rcu_access_pointer(nfsi->delegation) != NULL) {
334 spin_lock(&clp->cl_lock); 350 spin_lock(&clp->cl_lock);
335 delegation = nfs_detach_delegation_locked(nfsi, NULL); 351 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
336 spin_unlock(&clp->cl_lock); 352 spin_unlock(&clp->cl_lock);
337 if (delegation != NULL) 353 if (delegation != NULL)
338 nfs_do_return_delegation(inode, delegation, 0); 354 nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
346 struct nfs_delegation *delegation; 362 struct nfs_delegation *delegation;
347 int err = 0; 363 int err = 0;
348 364
349 if (rcu_dereference(nfsi->delegation) != NULL) { 365 if (rcu_access_pointer(nfsi->delegation) != NULL) {
350 spin_lock(&clp->cl_lock); 366 spin_lock(&clp->cl_lock);
351 delegation = nfs_detach_delegation_locked(nfsi, NULL); 367 delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
352 spin_unlock(&clp->cl_lock); 368 spin_unlock(&clp->cl_lock);
353 if (delegation != NULL) { 369 if (delegation != NULL) {
354 nfs_msync_inode(inode); 370 nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
526 if (inode == NULL) 542 if (inode == NULL)
527 continue; 543 continue;
528 spin_lock(&clp->cl_lock); 544 spin_lock(&clp->cl_lock);
529 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL); 545 delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
530 spin_unlock(&clp->cl_lock); 546 spin_unlock(&clp->cl_lock);
531 rcu_read_unlock(); 547 rcu_read_unlock();
532 if (delegation != NULL) 548 if (delegation != NULL)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index db3ad849a289..a7bb5c694aa3 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1052,7 +1052,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1052 struct inode *dir; 1052 struct inode *dir;
1053 int openflags, ret = 0; 1053 int openflags, ret = 0;
1054 1054
1055 if (!is_atomic_open(nd)) 1055 if (!is_atomic_open(nd) || d_mountpoint(dentry))
1056 goto no_open; 1056 goto no_open;
1057 parent = dget_parent(dentry); 1057 parent = dget_parent(dentry);
1058 dir = parent->d_inode; 1058 dir = parent->d_inode;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 638067007c65..071fcedd517c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5218,9 +5218,12 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5218 msg.rpc_resp = &calldata->res; 5218 msg.rpc_resp = &calldata->res;
5219 task_setup_data.callback_data = calldata; 5219 task_setup_data.callback_data = calldata;
5220 task = rpc_run_task(&task_setup_data); 5220 task = rpc_run_task(&task_setup_data);
5221 if (IS_ERR(task)) 5221 if (IS_ERR(task)) {
5222 status = PTR_ERR(task); 5222 status = PTR_ERR(task);
5223 goto out;
5224 }
5223 rpc_put_task(task); 5225 rpc_put_task(task);
5226 return 0;
5224out: 5227out:
5225 dprintk("<-- %s status=%d\n", __func__, status); 5228 dprintk("<-- %s status=%d\n", __func__, status);
5226 return status; 5229 return status;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index e01637240eeb..b4148fc00f9f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2187,6 +2187,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2187 if (data->version == 4) { 2187 if (data->version == 4) {
2188 error = nfs4_try_mount(flags, dev_name, data, mnt); 2188 error = nfs4_try_mount(flags, dev_name, data, mnt);
2189 kfree(data->client_address); 2189 kfree(data->client_address);
2190 kfree(data->nfs_server.export_path);
2190 goto out; 2191 goto out;
2191 } 2192 }
2192#endif /* CONFIG_NFS_V4 */ 2193#endif /* CONFIG_NFS_V4 */
@@ -2657,7 +2658,7 @@ static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2657 devname = nfs_path(path->mnt->mnt_devname, 2658 devname = nfs_path(path->mnt->mnt_devname,
2658 path->mnt->mnt_root, path->dentry, 2659 path->mnt->mnt_root, path->dentry,
2659 page, PAGE_SIZE); 2660 page, PAGE_SIZE);
2660 if (devname == NULL) 2661 if (IS_ERR(devname))
2661 goto out_freepage; 2662 goto out_freepage;
2662 tmp = kstrdup(devname, GFP_KERNEL); 2663 tmp = kstrdup(devname, GFP_KERNEL);
2663 if (tmp == NULL) 2664 if (tmp == NULL)
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63aa920..3aea3ca98ab7 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1201 1201
1202 1202
1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1203#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1204static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1205{
1206 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1207 return 1;
1208 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
1209 NFS_INO_COMMIT, nfs_wait_bit_killable,
1210 TASK_KILLABLE))
1211 return 1;
1212 return 0;
1213}
1214
1215static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1216{
1217 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1218 smp_mb__after_clear_bit();
1219 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1220}
1221
1222
1204static void nfs_commitdata_release(void *data) 1223static void nfs_commitdata_release(void *data)
1205{ 1224{
1206 struct nfs_write_data *wdata = data; 1225 struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1262 task = rpc_run_task(&task_setup_data); 1281 task = rpc_run_task(&task_setup_data);
1263 if (IS_ERR(task)) 1282 if (IS_ERR(task))
1264 return PTR_ERR(task); 1283 return PTR_ERR(task);
1265 if (how & FLUSH_SYNC)
1266 rpc_wait_for_completion_task(task);
1267 rpc_put_task(task); 1284 rpc_put_task(task);
1268 return 0; 1285 return 0;
1269} 1286}
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1294 BDI_RECLAIMABLE); 1311 BDI_RECLAIMABLE);
1295 nfs_clear_page_tag_locked(req); 1312 nfs_clear_page_tag_locked(req);
1296 } 1313 }
1314 nfs_commit_clear_lock(NFS_I(inode));
1297 return -ENOMEM; 1315 return -ENOMEM;
1298} 1316}
1299 1317
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
1349 next: 1367 next:
1350 nfs_clear_page_tag_locked(req); 1368 nfs_clear_page_tag_locked(req);
1351 } 1369 }
1370 nfs_commit_clear_lock(NFS_I(data->inode));
1352 nfs_commitdata_release(calldata); 1371 nfs_commitdata_release(calldata);
1353} 1372}
1354 1373
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
1363static int nfs_commit_inode(struct inode *inode, int how) 1382static int nfs_commit_inode(struct inode *inode, int how)
1364{ 1383{
1365 LIST_HEAD(head); 1384 LIST_HEAD(head);
1366 int res; 1385 int may_wait = how & FLUSH_SYNC;
1386 int res = 0;
1367 1387
1388 if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
1389 goto out;
1368 spin_lock(&inode->i_lock); 1390 spin_lock(&inode->i_lock);
1369 res = nfs_scan_commit(inode, &head, 0, 0); 1391 res = nfs_scan_commit(inode, &head, 0, 0);
1370 spin_unlock(&inode->i_lock); 1392 spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
1372 int error = nfs_commit_list(inode, &head, how); 1394 int error = nfs_commit_list(inode, &head, how);
1373 if (error < 0) 1395 if (error < 0)
1374 return error; 1396 return error;
1375 } 1397 if (may_wait)
1398 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1399 nfs_wait_bit_killable,
1400 TASK_KILLABLE);
1401 } else
1402 nfs_commit_clear_lock(NFS_I(inode));
1403out:
1376 return res; 1404 return res;
1377} 1405}
1378 1406
@@ -1444,6 +1472,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1444 1472
1445 BUG_ON(!PageLocked(page)); 1473 BUG_ON(!PageLocked(page));
1446 for (;;) { 1474 for (;;) {
1475 wait_on_page_writeback(page);
1447 req = nfs_page_find_request(page); 1476 req = nfs_page_find_request(page);
1448 if (req == NULL) 1477 if (req == NULL)
1449 break; 1478 break;
@@ -1478,30 +1507,18 @@ int nfs_wb_page(struct inode *inode, struct page *page)
1478 .range_start = range_start, 1507 .range_start = range_start,
1479 .range_end = range_end, 1508 .range_end = range_end,
1480 }; 1509 };
1481 struct nfs_page *req;
1482 int need_commit;
1483 int ret; 1510 int ret;
1484 1511
1485 while(PagePrivate(page)) { 1512 while(PagePrivate(page)) {
1513 wait_on_page_writeback(page);
1486 if (clear_page_dirty_for_io(page)) { 1514 if (clear_page_dirty_for_io(page)) {
1487 ret = nfs_writepage_locked(page, &wbc); 1515 ret = nfs_writepage_locked(page, &wbc);
1488 if (ret < 0) 1516 if (ret < 0)
1489 goto out_error; 1517 goto out_error;
1490 } 1518 }
1491 req = nfs_find_and_lock_request(page); 1519 ret = sync_inode(inode, &wbc);
1492 if (!req) 1520 if (ret < 0)
1493 break;
1494 if (IS_ERR(req)) {
1495 ret = PTR_ERR(req);
1496 goto out_error; 1521 goto out_error;
1497 }
1498 need_commit = test_bit(PG_CLEAN, &req->wb_flags);
1499 nfs_clear_page_tag_locked(req);
1500 if (need_commit) {
1501 ret = nfs_commit_inode(inode, FLUSH_SYNC);
1502 if (ret < 0)
1503 goto out_error;
1504 }
1505 } 1522 }
1506 return 0; 1523 return 0;
1507out_error: 1524out_error:
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e7655a..48145f505a6a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
749 sb->s_export_op = &nilfs_export_ops; 749 sb->s_export_op = &nilfs_export_ops;
750 sb->s_root = NULL; 750 sb->s_root = NULL;
751 sb->s_time_gran = 1; 751 sb->s_time_gran = 1;
752 sb->s_bdi = nilfs->ns_bdi;
752 753
753 err = load_nilfs(nilfs, sbi); 754 err = load_nilfs(nilfs, sbi);
754 if (err) 755 if (err)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbffe729..b3a159b21cfd 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
15 15
16config INOTIFY_USER 16config INOTIFY_USER
17 bool "Inotify support for userspace" 17 bool "Inotify support for userspace"
18 select ANON_INODES
18 select FSNOTIFY 19 select FSNOTIFY
19 default y 20 default y
20 ---help--- 21 ---help---
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1afb0a10229f..e27960cd76ab 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -28,6 +28,7 @@
28#include <linux/path.h> /* struct path */ 28#include <linux/path.h> /* struct path */
29#include <linux/slab.h> /* kmem_* */ 29#include <linux/slab.h> /* kmem_* */
30#include <linux/types.h> 30#include <linux/types.h>
31#include <linux/sched.h>
31 32
32#include "inotify.h" 33#include "inotify.h"
33 34
@@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
146 idr_for_each(&group->inotify_data.idr, idr_callback, group); 147 idr_for_each(&group->inotify_data.idr, idr_callback, group);
147 idr_remove_all(&group->inotify_data.idr); 148 idr_remove_all(&group->inotify_data.idr);
148 idr_destroy(&group->inotify_data.idr); 149 idr_destroy(&group->inotify_data.idr);
150 free_uid(group->inotify_data.user);
149} 151}
150 152
151void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) 153void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 472cdf29ef82..e46ca685b9be 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -546,21 +546,24 @@ retry:
546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) 546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
547 goto out_err; 547 goto out_err;
548 548
549 /* we are putting the mark on the idr, take a reference */
550 fsnotify_get_mark(&tmp_ientry->fsn_entry);
551
549 spin_lock(&group->inotify_data.idr_lock); 552 spin_lock(&group->inotify_data.idr_lock);
550 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, 553 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
551 group->inotify_data.last_wd+1, 554 group->inotify_data.last_wd+1,
552 &tmp_ientry->wd); 555 &tmp_ientry->wd);
553 spin_unlock(&group->inotify_data.idr_lock); 556 spin_unlock(&group->inotify_data.idr_lock);
554 if (ret) { 557 if (ret) {
558 /* we didn't get on the idr, drop the idr reference */
559 fsnotify_put_mark(&tmp_ientry->fsn_entry);
560
555 /* idr was out of memory allocate and try again */ 561 /* idr was out of memory allocate and try again */
556 if (ret == -EAGAIN) 562 if (ret == -EAGAIN)
557 goto retry; 563 goto retry;
558 goto out_err; 564 goto out_err;
559 } 565 }
560 566
561 /* we put the mark on the idr, take a reference */
562 fsnotify_get_mark(&tmp_ientry->fsn_entry);
563
564 /* we are on the idr, now get on the inode */ 567 /* we are on the idr, now get on the inode */
565 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 568 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
566 if (ret) { 569 if (ret) {
@@ -578,16 +581,13 @@ retry:
578 /* return the watch descriptor for this new entry */ 581 /* return the watch descriptor for this new entry */
579 ret = tmp_ientry->wd; 582 ret = tmp_ientry->wd;
580 583
581 /* match the ref from fsnotify_init_markentry() */
582 fsnotify_put_mark(&tmp_ientry->fsn_entry);
583
584 /* if this mark added a new event update the group mask */ 584 /* if this mark added a new event update the group mask */
585 if (mask & ~group->mask) 585 if (mask & ~group->mask)
586 fsnotify_recalc_group_mask(group); 586 fsnotify_recalc_group_mask(group);
587 587
588out_err: 588out_err:
589 if (ret < 0) 589 /* match the ref from fsnotify_init_markentry() */
590 kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry); 590 fsnotify_put_mark(&tmp_ientry->fsn_entry);
591 591
592 return ret; 592 return ret;
593} 593}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb2276790..f9d5d3ffc75a 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
406 struct buffer_head *bh) 406 struct buffer_head *bh)
407{ 407{
408 int ret = 0; 408 int ret = 0;
409 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
409 410
410 mlog_entry_void(); 411 mlog_entry_void();
411 412
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
425 426
426 get_bh(bh); /* for end_buffer_write_sync() */ 427 get_bh(bh); /* for end_buffer_write_sync() */
427 bh->b_end_io = end_buffer_write_sync; 428 bh->b_end_io = end_buffer_write_sync;
429 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
428 submit_bh(WRITE, bh); 430 submit_bh(WRITE, bh);
429 431
430 wait_on_buffer(bh); 432 wait_on_buffer(bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb91f4ea..12d5eb78a11a 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
184 BUG_ON(!lksb); 184 BUG_ON(!lksb);
185 185
186 /* only updates if this node masters the lockres */ 186 /* only updates if this node masters the lockres */
187 spin_lock(&res->spinlock);
187 if (res->owner == dlm->node_num) { 188 if (res->owner == dlm->node_num) {
188
189 spin_lock(&res->spinlock);
190 /* check the lksb flags for the direction */ 189 /* check the lksb flags for the direction */
191 if (lksb->flags & DLM_LKSB_GET_LVB) { 190 if (lksb->flags & DLM_LKSB_GET_LVB) {
192 mlog(0, "getting lvb from lockres for %s node\n", 191 mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
201 * here. In the future we might want to clear it at the time 200 * here. In the future we might want to clear it at the time
202 * the put is actually done. 201 * the put is actually done.
203 */ 202 */
204 spin_unlock(&res->spinlock);
205 } 203 }
204 spin_unlock(&res->spinlock);
206 205
207 /* reset any lvb flags on the lksb */ 206 /* reset any lvb flags on the lksb */
208 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB); 207 lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de157a08c..b83d6107a1f5 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
112 * O_RDONLY -> PRMODE level 112 * O_RDONLY -> PRMODE level
113 * O_WRONLY -> EXMODE level 113 * O_WRONLY -> EXMODE level
114 * 114 *
115 * O_NONBLOCK -> LKM_NOQUEUE 115 * O_NONBLOCK -> NOQUEUE
116 */ 116 */
117static int dlmfs_decode_open_flags(int open_flags, 117static int dlmfs_decode_open_flags(int open_flags,
118 int *level, 118 int *level,
119 int *flags) 119 int *flags)
120{ 120{
121 if (open_flags & (O_WRONLY|O_RDWR)) 121 if (open_flags & (O_WRONLY|O_RDWR))
122 *level = LKM_EXMODE; 122 *level = DLM_LOCK_EX;
123 else 123 else
124 *level = LKM_PRMODE; 124 *level = DLM_LOCK_PR;
125 125
126 *flags = 0; 126 *flags = 0;
127 if (open_flags & O_NONBLOCK) 127 if (open_flags & O_NONBLOCK)
128 *flags |= LKM_NOQUEUE; 128 *flags |= DLM_LKF_NOQUEUE;
129 129
130 return 0; 130 return 0;
131} 131}
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
166 * to be able userspace to be able to distinguish a 166 * to be able userspace to be able to distinguish a
167 * valid lock request from one that simply couldn't be 167 * valid lock request from one that simply couldn't be
168 * granted. */ 168 * granted. */
169 if (flags & LKM_NOQUEUE && status == -EAGAIN) 169 if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
170 status = -ETXTBSY; 170 status = -ETXTBSY;
171 kfree(fp); 171 kfree(fp);
172 goto bail; 172 goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
193 status = 0; 193 status = 0;
194 if (fp) { 194 if (fp) {
195 level = fp->fp_lock_level; 195 level = fp->fp_lock_level;
196 if (level != LKM_IVMODE) 196 if (level != DLM_LOCK_IV)
197 user_dlm_cluster_unlock(&ip->ip_lockres, level); 197 user_dlm_cluster_unlock(&ip->ip_lockres, level);
198 198
199 kfree(fp); 199 kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
262 if ((count + *ppos) > i_size_read(inode)) 262 if ((count + *ppos) > i_size_read(inode))
263 readlen = i_size_read(inode) - *ppos; 263 readlen = i_size_read(inode) - *ppos;
264 else 264 else
265 readlen = count - *ppos; 265 readlen = count;
266 266
267 lvb_buf = kmalloc(readlen, GFP_NOFS); 267 lvb_buf = kmalloc(readlen, GFP_NOFS);
268 if (!lvb_buf) 268 if (!lvb_buf)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc8341e..a5fbd9cea968 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
684 if (why == RESTART_META) { 684 if (why == RESTART_META) {
685 mlog(0, "restarting function.\n"); 685 mlog(0, "restarting function.\n");
686 restart_func = 1; 686 restart_func = 1;
687 status = 0;
687 } else { 688 } else {
688 BUG_ON(why != RESTART_TRANS); 689 BUG_ON(why != RESTART_TRANS);
689 690
@@ -1981,18 +1982,18 @@ relock:
1981 /* communicate with ocfs2_dio_end_io */ 1982 /* communicate with ocfs2_dio_end_io */
1982 ocfs2_iocb_set_rw_locked(iocb, rw_level); 1983 ocfs2_iocb_set_rw_locked(iocb, rw_level);
1983 1984
1984 if (direct_io) { 1985 ret = generic_segment_checks(iov, &nr_segs, &ocount,
1985 ret = generic_segment_checks(iov, &nr_segs, &ocount, 1986 VERIFY_READ);
1986 VERIFY_READ); 1987 if (ret)
1987 if (ret) 1988 goto out_dio;
1988 goto out_dio;
1989 1989
1990 count = ocount; 1990 count = ocount;
1991 ret = generic_write_checks(file, ppos, &count, 1991 ret = generic_write_checks(file, ppos, &count,
1992 S_ISBLK(inode->i_mode)); 1992 S_ISBLK(inode->i_mode));
1993 if (ret) 1993 if (ret)
1994 goto out_dio; 1994 goto out_dio;
1995 1995
1996 if (direct_io) {
1996 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, 1997 written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
1997 ppos, count, ocount); 1998 ppos, count, ocount);
1998 if (written < 0) { 1999 if (written < 0) {
@@ -2007,7 +2008,10 @@ relock:
2007 goto out_dio; 2008 goto out_dio;
2008 } 2009 }
2009 } else { 2010 } else {
2010 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos); 2011 current->backing_dev_info = file->f_mapping->backing_dev_info;
2012 written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
2013 ppos, count, 0);
2014 current->backing_dev_info = NULL;
2011 } 2015 }
2012 2016
2013out_dio: 2017out_dio:
@@ -2021,9 +2025,9 @@ out_dio:
2021 if (ret < 0) 2025 if (ret < 0)
2022 written = ret; 2026 written = ret;
2023 2027
2024 if (!ret && (old_size != i_size_read(inode) || 2028 if (!ret && ((old_size != i_size_read(inode)) ||
2025 old_clusters != OCFS2_I(inode)->ip_clusters || 2029 (old_clusters != OCFS2_I(inode)->ip_clusters) ||
2026 has_refcount)) { 2030 has_refcount)) {
2027 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2031 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2028 if (ret < 0) 2032 if (ret < 0)
2029 written = ret; 2033 written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb68b6d..af189887201c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 558 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
559 if (IS_ERR(handle)) { 559 if (IS_ERR(handle)) {
560 status = PTR_ERR(handle); 560 status = PTR_ERR(handle);
561 handle = NULL;
561 mlog_errno(status); 562 mlog_errno(status);
562 goto out; 563 goto out;
563 } 564 }
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode,
639 goto bail_unlock; 640 goto bail_unlock;
640 } 641 }
641 642
642 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, 643 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
643 orphan_dir_bh); 644 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
644 if (status < 0) { 645 orphan_dir_bh);
645 mlog_errno(status); 646 if (status < 0) {
646 goto bail_commit; 647 mlog_errno(status);
648 goto bail_commit;
649 }
647 } 650 }
648 651
649 /* set the inodes dtime */ 652 /* set the inodes dtime */
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
722static int ocfs2_wipe_inode(struct inode *inode, 725static int ocfs2_wipe_inode(struct inode *inode,
723 struct buffer_head *di_bh) 726 struct buffer_head *di_bh)
724{ 727{
725 int status, orphaned_slot; 728 int status, orphaned_slot = -1;
726 struct inode *orphan_dir_inode = NULL; 729 struct inode *orphan_dir_inode = NULL;
727 struct buffer_head *orphan_dir_bh = NULL; 730 struct buffer_head *orphan_dir_bh = NULL;
728 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 731 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
729 struct ocfs2_dinode *di; 732 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
730 733
731 di = (struct ocfs2_dinode *) di_bh->b_data; 734 if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
732 orphaned_slot = le16_to_cpu(di->i_orphaned_slot); 735 orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
733 736
734 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); 737 status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
735 if (status) 738 if (status)
736 return status; 739 return status;
737 740
738 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 741 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
739 ORPHAN_DIR_SYSTEM_INODE, 742 ORPHAN_DIR_SYSTEM_INODE,
740 orphaned_slot); 743 orphaned_slot);
741 if (!orphan_dir_inode) { 744 if (!orphan_dir_inode) {
742 status = -EEXIST; 745 status = -EEXIST;
743 mlog_errno(status); 746 mlog_errno(status);
744 goto bail; 747 goto bail;
745 } 748 }
746 749
747 /* Lock the orphan dir. The lock will be held for the entire 750 /* Lock the orphan dir. The lock will be held for the entire
748 * delete_inode operation. We do this now to avoid races with 751 * delete_inode operation. We do this now to avoid races with
749 * recovery completion on other nodes. */ 752 * recovery completion on other nodes. */
750 mutex_lock(&orphan_dir_inode->i_mutex); 753 mutex_lock(&orphan_dir_inode->i_mutex);
751 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1); 754 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
752 if (status < 0) { 755 if (status < 0) {
753 mutex_unlock(&orphan_dir_inode->i_mutex); 756 mutex_unlock(&orphan_dir_inode->i_mutex);
754 757
755 mlog_errno(status); 758 mlog_errno(status);
756 goto bail; 759 goto bail;
760 }
757 } 761 }
758 762
759 /* we do this while holding the orphan dir lock because we 763 /* we do this while holding the orphan dir lock because we
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
794 mlog_errno(status); 798 mlog_errno(status);
795 799
796bail_unlock_dir: 800bail_unlock_dir:
801 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
802 return status;
803
797 ocfs2_inode_unlock(orphan_dir_inode, 1); 804 ocfs2_inode_unlock(orphan_dir_inode, 1);
798 mutex_unlock(&orphan_dir_inode->i_mutex); 805 mutex_unlock(&orphan_dir_inode->i_mutex);
799 brelse(orphan_dir_bh); 806 brelse(orphan_dir_bh);
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
889 896
890 /* Do some basic inode verification... */ 897 /* Do some basic inode verification... */
891 di = (struct ocfs2_dinode *) di_bh->b_data; 898 di = (struct ocfs2_dinode *) di_bh->b_data;
892 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) { 899 if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
900 !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
893 /* 901 /*
894 * Inodes in the orphan dir must have ORPHANED_FL. The only 902 * Inodes in the orphan dir must have ORPHANED_FL. The only
895 * inodes that come back out of the orphan dir are reflink 903 * inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07b293c..0b28e1921a39 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
101/* Does someone have the file open O_DIRECT */ 101/* Does someone have the file open O_DIRECT */
102#define OCFS2_INODE_OPEN_DIRECT 0x00000040 102#define OCFS2_INODE_OPEN_DIRECT 0x00000040
103/* Tell the inode wipe code it's not in orphan dir */
104#define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080
103 105
104static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 106static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
105{ 107{
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50ae4097..4cbb18f26c5f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
408 } 408 }
409 } 409 }
410 410
411 status = ocfs2_add_entry(handle, dentry, inode, 411 /*
412 OCFS2_I(inode)->ip_blkno, parent_fe_bh, 412 * Do this before adding the entry to the directory. We add
413 &lookup); 413 * also set d_op after success so that ->d_iput() will cleanup
414 if (status < 0) { 414 * the dentry lock even if ocfs2_add_entry() fails below.
415 */
416 status = ocfs2_dentry_attach_lock(dentry, inode,
417 OCFS2_I(dir)->ip_blkno);
418 if (status) {
415 mlog_errno(status); 419 mlog_errno(status);
416 goto leave; 420 goto leave;
417 } 421 }
422 dentry->d_op = &ocfs2_dentry_ops;
418 423
419 status = ocfs2_dentry_attach_lock(dentry, inode, 424 status = ocfs2_add_entry(handle, dentry, inode,
420 OCFS2_I(dir)->ip_blkno); 425 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
421 if (status) { 426 &lookup);
427 if (status < 0) {
422 mlog_errno(status); 428 mlog_errno(status);
423 goto leave; 429 goto leave;
424 } 430 }
425 431
426 insert_inode_hash(inode); 432 insert_inode_hash(inode);
427 dentry->d_op = &ocfs2_dentry_ops;
428 d_instantiate(dentry, inode); 433 d_instantiate(dentry, inode);
429 status = 0; 434 status = 0;
430leave: 435leave:
@@ -445,11 +450,6 @@ leave:
445 450
446 ocfs2_free_dir_lookup_result(&lookup); 451 ocfs2_free_dir_lookup_result(&lookup);
447 452
448 if ((status < 0) && inode) {
449 clear_nlink(inode);
450 iput(inode);
451 }
452
453 if (inode_ac) 453 if (inode_ac)
454 ocfs2_free_alloc_context(inode_ac); 454 ocfs2_free_alloc_context(inode_ac);
455 455
@@ -459,6 +459,17 @@ leave:
459 if (meta_ac) 459 if (meta_ac)
460 ocfs2_free_alloc_context(meta_ac); 460 ocfs2_free_alloc_context(meta_ac);
461 461
462 /*
463 * We should call iput after the i_mutex of the bitmap been
464 * unlocked in ocfs2_free_alloc_context, or the
465 * ocfs2_delete_inode will mutex_lock again.
466 */
467 if ((status < 0) && inode) {
468 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
469 clear_nlink(inode);
470 iput(inode);
471 }
472
462 mlog_exit(status); 473 mlog_exit(status);
463 474
464 return status; 475 return status;
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
1771 } 1782 }
1772 } 1783 }
1773 1784
1774 status = ocfs2_add_entry(handle, dentry, inode, 1785 /*
1775 le64_to_cpu(fe->i_blkno), parent_fe_bh, 1786 * Do this before adding the entry to the directory. We add
1776 &lookup); 1787 * also set d_op after success so that ->d_iput() will cleanup
1777 if (status < 0) { 1788 * the dentry lock even if ocfs2_add_entry() fails below.
1789 */
1790 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1791 if (status) {
1778 mlog_errno(status); 1792 mlog_errno(status);
1779 goto bail; 1793 goto bail;
1780 } 1794 }
1795 dentry->d_op = &ocfs2_dentry_ops;
1781 1796
1782 status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno); 1797 status = ocfs2_add_entry(handle, dentry, inode,
1783 if (status) { 1798 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1799 &lookup);
1800 if (status < 0) {
1784 mlog_errno(status); 1801 mlog_errno(status);
1785 goto bail; 1802 goto bail;
1786 } 1803 }
1787 1804
1788 insert_inode_hash(inode); 1805 insert_inode_hash(inode);
1789 dentry->d_op = &ocfs2_dentry_ops;
1790 d_instantiate(dentry, inode); 1806 d_instantiate(dentry, inode);
1791bail: 1807bail:
1792 if (status < 0 && did_quota) 1808 if (status < 0 && did_quota)
@@ -1811,6 +1827,7 @@ bail:
1811 if (xattr_ac) 1827 if (xattr_ac)
1812 ocfs2_free_alloc_context(xattr_ac); 1828 ocfs2_free_alloc_context(xattr_ac);
1813 if ((status < 0) && inode) { 1829 if ((status < 0) && inode) {
1830 OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
1814 clear_nlink(inode); 1831 clear_nlink(inode);
1815 iput(inode); 1832 iput(inode);
1816 } 1833 }
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1976 } 1993 }
1977 1994
1978 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL); 1995 le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
1996 OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
1979 1997
1980 /* Record which orphan dir our inode now resides 1998 /* Record which orphan dir our inode now resides
1981 * in. delete_inode will use this to determine which orphan 1999 * in. delete_inode will use this to determine which orphan
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c7877e..5cbcd0f008fc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
4083 di->i_attr = s_di->i_attr; 4083 di->i_attr = s_di->i_attr;
4084 4084
4085 if (preserve) { 4085 if (preserve) {
4086 t_inode->i_uid = s_inode->i_uid;
4087 t_inode->i_gid = s_inode->i_gid;
4088 t_inode->i_mode = s_inode->i_mode;
4086 di->i_uid = s_di->i_uid; 4089 di->i_uid = s_di->i_uid;
4087 di->i_gid = s_di->i_gid; 4090 di->i_gid = s_di->i_gid;
4088 di->i_mode = s_di->i_mode; 4091 di->i_mode = s_di->i_mode;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e5..885ab5513ac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
81#include <linux/pid_namespace.h> 81#include <linux/pid_namespace.h>
82#include <linux/ptrace.h> 82#include <linux/ptrace.h>
83#include <linux/tracehook.h> 83#include <linux/tracehook.h>
84#include <linux/swapops.h>
85 84
86#include <asm/pgtable.h> 85#include <asm/pgtable.h>
87#include <asm/processor.h> 86#include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
495 rsslim, 494 rsslim,
496 mm ? mm->start_code : 0, 495 mm ? mm->start_code : 0,
497 mm ? mm->end_code : 0, 496 mm ? mm->end_code : 0,
498 (permitted && mm) ? task->stack_start : 0, 497 (permitted && mm) ? mm->start_stack : 0,
499 esp, 498 esp,
500 eip, 499 eip,
501 /* The signal information here is obsolete. 500 /* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd5..47f5b145f56e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
247 } else if (vma->vm_start <= mm->start_stack && 247 } else if (vma->vm_start <= mm->start_stack &&
248 vma->vm_end >= mm->start_stack) { 248 vma->vm_end >= mm->start_stack) {
249 name = "[stack]"; 249 name = "[stack]";
250 } else {
251 unsigned long stack_start;
252 struct proc_maps_private *pmp;
253
254 pmp = m->private;
255 stack_start = pmp->task->stack_start;
256
257 if (vma->vm_start <= stack_start &&
258 vma->vm_end >= stack_start) {
259 pad_len_spaces(m, len);
260 seq_printf(m,
261 "[threadstack:%08lx]",
262#ifdef CONFIG_STACK_GROWSUP
263 vma->vm_end - stack_start
264#else
265 stack_start - vma->vm_start
266#endif
267 );
268 }
269 } 250 }
270 } else { 251 } else {
271 name = "[vdso]"; 252 name = "[vdso]";
diff --git a/fs/super.c b/fs/super.c
index dc72491a19f9..1527e6a0ee35 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,6 +37,7 @@
37#include <linux/kobject.h> 37#include <linux/kobject.h>
38#include <linux/mutex.h> 38#include <linux/mutex.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/backing-dev.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41#include "internal.h" 42#include "internal.h"
42 43
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 4e50286a4cc3..1dabed286b4c 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
164 name, de->name)) 164 name, de->name))
165 goto found; 165 goto found;
166 } 166 }
167 dir_put_page(page);
167 } 168 }
168 dir_put_page(page);
169 169
170 if (++n >= npages) 170 if (++n >= npages)
171 n = 0; 171 n = 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 52e06b487ced..29f1edca76de 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1209,6 +1209,7 @@ xfs_fs_put_super(
1209 1209
1210 xfs_unmountfs(mp); 1210 xfs_unmountfs(mp);
1211 xfs_freesb(mp); 1211 xfs_freesb(mp);
1212 xfs_inode_shrinker_unregister(mp);
1212 xfs_icsb_destroy_counters(mp); 1213 xfs_icsb_destroy_counters(mp);
1213 xfs_close_devices(mp); 1214 xfs_close_devices(mp);
1214 xfs_dmops_put(mp); 1215 xfs_dmops_put(mp);
@@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
1622 if (error) 1623 if (error)
1623 goto fail_vnrele; 1624 goto fail_vnrele;
1624 1625
1626 xfs_inode_shrinker_register(mp);
1627
1625 kfree(mtpt); 1628 kfree(mtpt);
1626 return 0; 1629 return 0;
1627 1630
@@ -1867,6 +1870,7 @@ init_xfs_fs(void)
1867 goto out_cleanup_procfs; 1870 goto out_cleanup_procfs;
1868 1871
1869 vfs_initquota(); 1872 vfs_initquota();
1873 xfs_inode_shrinker_init();
1870 1874
1871 error = register_filesystem(&xfs_fs_type); 1875 error = register_filesystem(&xfs_fs_type);
1872 if (error) 1876 if (error)
@@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
1894{ 1898{
1895 vfs_exitquota(); 1899 vfs_exitquota();
1896 unregister_filesystem(&xfs_fs_type); 1900 unregister_filesystem(&xfs_fs_type);
1901 xfs_inode_shrinker_destroy();
1897 xfs_sysctl_unregister(); 1902 xfs_sysctl_unregister();
1898 xfs_cleanup_procfs(); 1903 xfs_cleanup_procfs();
1899 xfs_buf_terminate(); 1904 xfs_buf_terminate();
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index fd9698215759..a427c638d909 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
95 struct xfs_perag *pag, int flags), 95 struct xfs_perag *pag, int flags),
96 int flags, 96 int flags,
97 int tag, 97 int tag,
98 int exclusive) 98 int exclusive,
99 int *nr_to_scan)
99{ 100{
100 uint32_t first_index; 101 uint32_t first_index;
101 int last_error = 0; 102 int last_error = 0;
@@ -134,7 +135,7 @@ restart:
134 if (error == EFSCORRUPTED) 135 if (error == EFSCORRUPTED)
135 break; 136 break;
136 137
137 } while (1); 138 } while ((*nr_to_scan)--);
138 139
139 if (skipped) { 140 if (skipped) {
140 delay(1); 141 delay(1);
@@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
150 struct xfs_perag *pag, int flags), 151 struct xfs_perag *pag, int flags),
151 int flags, 152 int flags,
152 int tag, 153 int tag,
153 int exclusive) 154 int exclusive,
155 int *nr_to_scan)
154{ 156{
155 int error = 0; 157 int error = 0;
156 int last_error = 0; 158 int last_error = 0;
157 xfs_agnumber_t ag; 159 xfs_agnumber_t ag;
160 int nr;
158 161
162 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
159 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { 163 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
160 struct xfs_perag *pag; 164 struct xfs_perag *pag;
161 165
@@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
165 continue; 169 continue;
166 } 170 }
167 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 171 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
168 exclusive); 172 exclusive, &nr);
169 xfs_perag_put(pag); 173 xfs_perag_put(pag);
170 if (error) { 174 if (error) {
171 last_error = error; 175 last_error = error;
172 if (error == EFSCORRUPTED) 176 if (error == EFSCORRUPTED)
173 break; 177 break;
174 } 178 }
179 if (nr <= 0)
180 break;
175 } 181 }
182 if (nr_to_scan)
183 *nr_to_scan = nr;
176 return XFS_ERROR(last_error); 184 return XFS_ERROR(last_error);
177} 185}
178 186
@@ -291,7 +299,7 @@ xfs_sync_data(
291 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 299 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
292 300
293 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 301 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
294 XFS_ICI_NO_TAG, 0); 302 XFS_ICI_NO_TAG, 0, NULL);
295 if (error) 303 if (error)
296 return XFS_ERROR(error); 304 return XFS_ERROR(error);
297 305
@@ -310,7 +318,7 @@ xfs_sync_attr(
310 ASSERT((flags & ~SYNC_WAIT) == 0); 318 ASSERT((flags & ~SYNC_WAIT) == 0);
311 319
312 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 320 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
313 XFS_ICI_NO_TAG, 0); 321 XFS_ICI_NO_TAG, 0, NULL);
314} 322}
315 323
316STATIC int 324STATIC int
@@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
673 radix_tree_tag_set(&pag->pag_ici_root, 681 radix_tree_tag_set(&pag->pag_ici_root,
674 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), 682 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
675 XFS_ICI_RECLAIM_TAG); 683 XFS_ICI_RECLAIM_TAG);
684 pag->pag_ici_reclaimable++;
676} 685}
677 686
678/* 687/*
@@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
705{ 714{
706 radix_tree_tag_clear(&pag->pag_ici_root, 715 radix_tree_tag_clear(&pag->pag_ici_root,
707 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); 716 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
717 pag->pag_ici_reclaimable--;
708} 718}
709 719
710/* 720/*
@@ -854,5 +864,93 @@ xfs_reclaim_inodes(
854 int mode) 864 int mode)
855{ 865{
856 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 866 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
857 XFS_ICI_RECLAIM_TAG, 1); 867 XFS_ICI_RECLAIM_TAG, 1, NULL);
868}
869
870/*
871 * Shrinker infrastructure.
872 *
873 * This is all far more complex than it needs to be. It adds a global list of
874 * mounts because the shrinkers can only call a global context. We need to make
875 * the shrinkers pass a context to avoid the need for global state.
876 */
877static LIST_HEAD(xfs_mount_list);
878static struct rw_semaphore xfs_mount_list_lock;
879
880static int
881xfs_reclaim_inode_shrink(
882 int nr_to_scan,
883 gfp_t gfp_mask)
884{
885 struct xfs_mount *mp;
886 struct xfs_perag *pag;
887 xfs_agnumber_t ag;
888 int reclaimable = 0;
889
890 if (nr_to_scan) {
891 if (!(gfp_mask & __GFP_FS))
892 return -1;
893
894 down_read(&xfs_mount_list_lock);
895 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
896 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
897 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
898 if (nr_to_scan <= 0)
899 break;
900 }
901 up_read(&xfs_mount_list_lock);
902 }
903
904 down_read(&xfs_mount_list_lock);
905 list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
906 for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
907
908 pag = xfs_perag_get(mp, ag);
909 if (!pag->pag_ici_init) {
910 xfs_perag_put(pag);
911 continue;
912 }
913 reclaimable += pag->pag_ici_reclaimable;
914 xfs_perag_put(pag);
915 }
916 }
917 up_read(&xfs_mount_list_lock);
918 return reclaimable;
919}
920
921static struct shrinker xfs_inode_shrinker = {
922 .shrink = xfs_reclaim_inode_shrink,
923 .seeks = DEFAULT_SEEKS,
924};
925
926void __init
927xfs_inode_shrinker_init(void)
928{
929 init_rwsem(&xfs_mount_list_lock);
930 register_shrinker(&xfs_inode_shrinker);
931}
932
933void
934xfs_inode_shrinker_destroy(void)
935{
936 ASSERT(list_empty(&xfs_mount_list));
937 unregister_shrinker(&xfs_inode_shrinker);
938}
939
940void
941xfs_inode_shrinker_register(
942 struct xfs_mount *mp)
943{
944 down_write(&xfs_mount_list_lock);
945 list_add_tail(&mp->m_mplist, &xfs_mount_list);
946 up_write(&xfs_mount_list_lock);
947}
948
949void
950xfs_inode_shrinker_unregister(
951 struct xfs_mount *mp)
952{
953 down_write(&xfs_mount_list_lock);
954 list_del(&mp->m_mplist);
955 up_write(&xfs_mount_list_lock);
858} 956}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index d480c346cabb..cdcbaaca9880 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 53int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
54int xfs_inode_ag_iterator(struct xfs_mount *mp, 54int xfs_inode_ag_iterator(struct xfs_mount *mp,
55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 55 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
56 int flags, int tag, int write_lock); 56 int flags, int tag, int write_lock, int *nr_to_scan);
57
58void xfs_inode_shrinker_init(void);
59void xfs_inode_shrinker_destroy(void);
60void xfs_inode_shrinker_register(struct xfs_mount *mp);
61void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
57 62
58#endif 63#endif
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 5d0ee8d492db..50bee07d6b0e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
891 uint flags) 891 uint flags)
892{ 892{
893 ASSERT(mp->m_quotainfo); 893 ASSERT(mp->m_quotainfo);
894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); 894 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
895 XFS_ICI_NO_TAG, 0, NULL);
895} 896}
896 897
897/*------------------------------------------------------------------------*/ 898/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index b1a5a1ff88ea..abb8222b88c9 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -223,6 +223,7 @@ typedef struct xfs_perag {
223 int pag_ici_init; /* incore inode cache initialised */ 223 int pag_ici_init; /* incore inode cache initialised */
224 rwlock_t pag_ici_lock; /* incore inode lock */ 224 rwlock_t pag_ici_lock; /* incore inode lock */
225 struct radix_tree_root pag_ici_root; /* incore inode cache root */ 225 struct radix_tree_root pag_ici_root; /* incore inode cache root */
226 int pag_ici_reclaimable; /* reclaimable inodes */
226#endif 227#endif
227 int pagb_count; /* pagb slots in use */ 228 int pagb_count; /* pagb slots in use */
228 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */ 229 xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4fa0bc7b983e..9ff48a16a7ee 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -259,6 +259,7 @@ typedef struct xfs_mount {
259 wait_queue_head_t m_wait_single_sync_task; 259 wait_queue_head_t m_wait_single_sync_task;
260 __int64_t m_update_flags; /* sb flags we need to update 260 __int64_t m_update_flags; /* sb flags we need to update
261 on the next remount,rw */ 261 on the next remount,rw */
262 struct list_head m_mplist; /* inode shrinker mount list */
262} xfs_mount_t; 263} xfs_mount_t;
263 264
264/* 265/*