Merge branch 'for-2.6.34-incoming' into for-2.6.35-incoming

author: J. Bruce Fields <bfields@citi.umich.edu> 2010-06-08 20:05:18 -0400
committer: J. Bruce Fields <bfields@citi.umich.edu> 2010-06-08 20:05:18 -0400
commit: 44b56603c4c476b845a824cff6fe905c6268b2a1 (patch)
tree: b7e792414fef2390718a657765719fbbb529ce84 /fs
parent: c3935e30495869dd611e1cd62253c94ebc7c6c04 (diff)
parent: b160fdabe93a8a53094f90f02bf4dcb500782aab (diff)
50 files changed, 518 insertions, 276 deletions
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 109a6c606d92..e8e5e63ac950 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -177,8 +177,7 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
                }
        /* Trigger mount for path component or follow link */
        } else if (ino->flags & AUTOFS_INF_PENDING ||
-                        autofs4_need_mount(flags) ||
+                        autofs4_need_mount(flags)) {
-                        current->link_count) {
                DPRINTK("waiting for mount name=%.*s",
                        dentry->d_name.len, dentry->d_name.name);
@@ -262,7 +261,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
                spin_unlock(&dcache_lock);
                spin_unlock(&sbi->fs_lock);
-                status = try_to_fill_dentry(dentry, 0);
+                status = try_to_fill_dentry(dentry, nd->flags);
                if (status)
                        goto out_error;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e84ef60ffe35..97a97839a867 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1481,12 +1481,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
                ret = -EBADF;
                goto out_drop_write;
        }
        src = src_file->f_dentry->d_inode;
        ret = -EINVAL;
        if (src == inode)
                goto out_fput;
+        /* the src must be open for reading */
+        if (!(src_file->f_mode & FMODE_READ))
+                goto out_fput;
        ret = -EISDIR;
        if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
                goto out_fput;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index f7c255f9c624..a8cd821226da 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -34,6 +34,7 @@ struct cachefiles_object {
        loff_t                          i_size;         /* object size */
        unsigned long                   flags;
 #define CACHEFILES_OBJECT_ACTIVE        0               /* T if marked active */
+#define CACHEFILES_OBJECT_BURIED        1               /* T if preemptively buried */
        atomic_t                        usage;          /* object usage count */
        uint8_t                         type;           /* object type */
        uint8_t                         new;            /* T if object new */
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index d5db84a1ee0d..f4a7840bf42c 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -93,6 +93,59 @@ static noinline void cachefiles_printk_object(struct cachefiles_object *object,
 }
 /*
+ * mark the owner of a dentry, if there is one, to indicate that that dentry
+ * has been preemptively deleted
+ * - the caller must hold the i_mutex on the dentry's parent as required to
+ *   call vfs_unlink(), vfs_rmdir() or vfs_rename()
+ */
+static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
+                                          struct dentry *dentry)
+{
+        struct cachefiles_object *object;
+        struct rb_node *p;
+        _enter(",'%*.*s'",
+               dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
+        write_lock(&cache->active_lock);
+        p = cache->active_nodes.rb_node;
+        while (p) {
+                object = rb_entry(p, struct cachefiles_object, active_node);
+                if (object->dentry > dentry)
+                        p = p->rb_left;
+                else if (object->dentry < dentry)
+                        p = p->rb_right;
+                else
+                        goto found_dentry;
+        }
+        write_unlock(&cache->active_lock);
+        _leave(" [no owner]");
+        return;
+        /* found the dentry for  */
+found_dentry:
+        kdebug("preemptive burial: OBJ%x [%s] %p",
+               object->fscache.debug_id,
+               fscache_object_states[object->fscache.state],
+               dentry);
+        if (object->fscache.state < FSCACHE_OBJECT_DYING) {
+                printk(KERN_ERR "\n");
+                printk(KERN_ERR "CacheFiles: Error:"
+                       " Can't preemptively bury live object\n");
+                cachefiles_printk_object(object, NULL);
+        } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
+                printk(KERN_ERR "CacheFiles: Error:"
+                       " Object already preemptively buried\n");
+        }
+        write_unlock(&cache->active_lock);
+        _leave(" [owner marked]");
+}
+/*
 * record the fact that an object is now active
 */
 static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
@@ -219,7 +272,8 @@ requeue:
 */
 static int cachefiles_bury_object(struct cachefiles_cache *cache,
                                  struct dentry *dir,
-                                  struct dentry *rep)
+                                  struct dentry *rep,
+                                  bool preemptive)
 {
        struct dentry *grave, *trap;
        char nbuffer[8 + 8 + 1];
@@ -229,11 +283,16 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
               dir->d_name.len, dir->d_name.len, dir->d_name.name,
               rep->d_name.len, rep->d_name.len, rep->d_name.name);
+        _debug("remove %p from %p", rep, dir);
        /* non-directories can just be unlinked */
        if (!S_ISDIR(rep->d_inode->i_mode)) {
                _debug("unlink stale object");
                ret = vfs_unlink(dir->d_inode, rep);
+                if (preemptive)
+                        cachefiles_mark_object_buried(cache, rep);
                mutex_unlock(&dir->d_inode->i_mutex);
                if (ret == -EIO)
@@ -325,6 +384,9 @@ try_again:
        if (ret != 0 && ret != -ENOMEM)
                cachefiles_io_error(cache, "Rename failed with error %d", ret);
+        if (preemptive)
+                cachefiles_mark_object_buried(cache, rep);
        unlock_rename(cache->graveyard, dir);
        dput(grave);
        _leave(" = 0");
@@ -340,7 +402,7 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
        struct dentry *dir;
        int ret;
-        _enter(",{%p}", object->dentry);
+        _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
        ASSERT(object->dentry);
        ASSERT(object->dentry->d_inode);
@@ -350,15 +412,25 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
        mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
-        /* we need to check that our parent is _still_ our parent - it may have
+        if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
-         * been renamed */
+                /* object allocation for the same key preemptively deleted this
-        if (dir == object->dentry->d_parent) {
+                 * object's file so that it could create its own file */
-                ret = cachefiles_bury_object(cache, dir, object->dentry);
+                _debug("object preemptively buried");
-        } else {
-                /* it got moved, presumably by cachefilesd culling it, so it's
-                 * no longer in the key path and we can ignore it */
                mutex_unlock(&dir->d_inode->i_mutex);
                ret = 0;
+        } else {
+                /* we need to check that our parent is _still_ our parent - it
+                 * may have been renamed */
+                if (dir == object->dentry->d_parent) {
+                        ret = cachefiles_bury_object(cache, dir,
+                                                     object->dentry, false);
+                } else {
+                        /* it got moved, presumably by cachefilesd culling it,
+                         * so it's no longer in the key path and we can ignore
+                         * it */
+                        mutex_unlock(&dir->d_inode->i_mutex);
+                        ret = 0;
+                }
        }
        dput(dir);
@@ -381,7 +453,9 @@ int cachefiles_walk_to_object(struct cachefiles_object *parent,
        const char *name;
        int ret, nlen;
-        _enter("{%p},,%s,", parent->dentry, key);
+        _enter("OBJ%x{%p},OBJ%x,%s,",
+               parent->fscache.debug_id, parent->dentry,
+               object->fscache.debug_id, key);
        cache = container_of(parent->fscache.cache,
                             struct cachefiles_cache, cache);
@@ -509,7 +583,7 @@ lookup_again:
                         * mutex) */
                        object->dentry = NULL;
-                        ret = cachefiles_bury_object(cache, dir, next);
+                        ret = cachefiles_bury_object(cache, dir, next, true);
                        dput(next);
                        next = NULL;
@@ -828,7 +902,7 @@ int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
        /*  actually remove the victim (drops the dir mutex) */
        _debug("bury");
-        ret = cachefiles_bury_object(cache, dir, victim);
+        ret = cachefiles_bury_object(cache, dir, victim, false);
        if (ret < 0)
                goto error;
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index b5808cdb2232..039b5011d83b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -77,6 +77,8 @@ static int cachefiles_check_cache_dir(struct cachefiles_cache *cache,
 /*
 * check the security details of the on-disk cache
 * - must be called with security override in force
+ * - must return with a security override in force - even in the case of an
+ *   error
 */
 int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
                                        struct dentry *root,
@@ -99,6 +101,8 @@ int cachefiles_determine_cache_security(struct cachefiles_cache *cache,
         * which create files */
        ret = set_create_files_as(new, root->d_inode);
        if (ret < 0) {
+                abort_creds(new);
+                cachefiles_begin_secure(cache, _saved_cred);
                _leave(" = %d [cfa]", ret);
                return ret;
        }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 412593703d1e..a9005d862ed4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -504,12 +504,11 @@ static void writepages_finish(struct ceph_osd_request *req,
        int i;
        struct ceph_snap_context *snapc = req->r_snapc;
        struct address_space *mapping = inode->i_mapping;
-        struct writeback_control *wbc = req->r_wbc;
        __s32 rc = -EIO;
        u64 bytes = 0;
        struct ceph_client *client = ceph_inode_to_client(inode);
        long writeback_stat;
-        unsigned issued = __ceph_caps_issued(ci, NULL);
+        unsigned issued = ceph_caps_issued(ci);
        /* parse reply */
        replyhead = msg->front.iov_base;
@@ -546,10 +545,6 @@ static void writepages_finish(struct ceph_osd_request *req,
                        clear_bdi_congested(&client->backing_dev_info,
                                            BLK_RW_ASYNC);
-                if (i >= wrote) {
-                        dout("inode %p skipping page %p\n", inode, page);
-                        wbc->pages_skipped++;
-                }
                ceph_put_snap_context((void *)page->private);
                page->private = 0;
                ClearPagePrivate(page);
@@ -799,7 +794,6 @@ get_more_pages:
                                alloc_page_vec(client, req);
                                req->r_callback = writepages_finish;
                                req->r_inode = inode;
-                                req->r_wbc = wbc;
                        }
                        /* note position of first page in pvec */
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c
index f6394b94b866..818afe72e6c7 100644
--- a/fs/ceph/auth.c
+++ b/fs/ceph/auth.c
@@ -3,6 +3,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/slab.h>
 #include "types.h"
 #include "auth_none.h"
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h
index 56c05533a31c..8164df1a08be 100644
--- a/fs/ceph/auth_none.h
+++ b/fs/ceph/auth_none.h
@@ -1,6 +1,8 @@
 #ifndef _FS_CEPH_AUTH_NONE_H
 #define _FS_CEPH_AUTH_NONE_H
+#include <linux/slab.h>
 #include "auth.h"
 /*
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c
index d9001a4dc8cc..fee5a08da881 100644
--- a/fs/ceph/auth_x.c
+++ b/fs/ceph/auth_x.c
@@ -12,8 +12,6 @@
 #include "auth.h"
 #include "decode.h"
-struct kmem_cache *ceph_x_ticketbuf_cachep;
 #define TEMP_TICKET_BUF_LEN     256
 static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
@@ -131,13 +129,12 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
        char *ticket_buf;
        u8 struct_v;
-        dbuf = kmem_cache_alloc(ceph_x_ticketbuf_cachep, GFP_NOFS | GFP_ATOMIC);
+        dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
        if (!dbuf)
                return -ENOMEM;
        ret = -ENOMEM;
-        ticket_buf = kmem_cache_alloc(ceph_x_ticketbuf_cachep,
+        ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS);
-                                      GFP_NOFS | GFP_ATOMIC);
        if (!ticket_buf)
                goto out_dbuf;
@@ -251,9 +248,9 @@ static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
        ret = 0;
 out:
-        kmem_cache_free(ceph_x_ticketbuf_cachep, ticket_buf);
+        kfree(ticket_buf);
 out_dbuf:
-        kmem_cache_free(ceph_x_ticketbuf_cachep, dbuf);
+        kfree(dbuf);
        return ret;
 bad:
@@ -605,8 +602,6 @@ static void ceph_x_destroy(struct ceph_auth_client *ac)
                remove_ticket_handler(ac, th);
        }
-        kmem_cache_destroy(ceph_x_ticketbuf_cachep);
        kfree(ac->private);
        ac->private = NULL;
 }
@@ -641,26 +636,20 @@ int ceph_x_init(struct ceph_auth_client *ac)
        int ret;
        dout("ceph_x_init %p\n", ac);
+        ret = -ENOMEM;
        xi = kzalloc(sizeof(*xi), GFP_NOFS);
        if (!xi)
-                return -ENOMEM;
+                goto out;
-        ret = -ENOMEM;
-        ceph_x_ticketbuf_cachep = kmem_cache_create("ceph_x_ticketbuf",
-                                      TEMP_TICKET_BUF_LEN, 8,
-                                      (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
-                                      NULL);
-        if (!ceph_x_ticketbuf_cachep)
-                goto done_nomem;
        ret = -EINVAL;
        if (!ac->secret) {
                pr_err("no secret set (for auth_x protocol)\n");
-                goto done_nomem;
+                goto out_nomem;
        }
        ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret);
        if (ret)
-                goto done_nomem;
+                goto out_nomem;
        xi->starting = true;
        xi->ticket_handlers = RB_ROOT;
@@ -670,10 +659,9 @@ int ceph_x_init(struct ceph_auth_client *ac)
        ac->ops = &ceph_x_ops;
        return 0;
-done_nomem:
+out_nomem:
        kfree(xi);
-        if (ceph_x_ticketbuf_cachep)
+out:
-                kmem_cache_destroy(ceph_x_ticketbuf_cachep);
        return ret;
 }
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aa2239fa9a3b..d9400534b279 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -858,6 +858,8 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
 }
 /*
+ * Remove a cap.  Take steps to deal with a racing iterate_session_caps.
+ *
 * caller should hold i_lock.
 * caller will not hold session s_mutex if called from destroy_inode.
 */
@@ -866,15 +868,10 @@ void __ceph_remove_cap(struct ceph_cap *cap)
        struct ceph_mds_session *session = cap->session;
        struct ceph_inode_info *ci = cap->ci;
        struct ceph_mds_client *mdsc = &ceph_client(ci->vfs_inode.i_sb)->mdsc;
+        int removed = 0;
        dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
-        /* remove from inode list */
-        rb_erase(&cap->ci_node, &ci->i_caps);
-        cap->ci = NULL;
-        if (ci->i_auth_cap == cap)
-                ci->i_auth_cap = NULL;
        /* remove from session list */
        spin_lock(&session->s_cap_lock);
        if (session->s_cap_iterator == cap) {
@@ -885,10 +882,18 @@ void __ceph_remove_cap(struct ceph_cap *cap)
                list_del_init(&cap->session_caps);
                session->s_nr_caps--;
                cap->session = NULL;
+                removed = 1;
        }
+        /* protect backpointer with s_cap_lock: see iterate_session_caps */
+        cap->ci = NULL;
        spin_unlock(&session->s_cap_lock);
-        if (cap->session == NULL)
+        /* remove from inode list */
+        rb_erase(&cap->ci_node, &ci->i_caps);
+        if (ci->i_auth_cap == cap)
+                ci->i_auth_cap = NULL;
+        if (removed)
                ceph_put_cap(cap);
        if (!__ceph_is_any_caps(ci) && ci->i_snap_realm) {
@@ -1861,8 +1866,8 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
                } else {
                        pr_err("%p auth cap %p not mds%d ???\n", inode,
                               cap, session->s_mds);
-                        spin_unlock(&inode->i_lock);
                }
+                spin_unlock(&inode->i_lock);
        }
 }
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ea8ee2e526aa..650d2db5ed26 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -880,7 +880,16 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
                 * do_request, above).  If there is no trace, we need
                 * to do it here.
                 */
+                /* d_move screws up d_subdirs order */
+                ceph_i_clear(new_dir, CEPH_I_COMPLETE);
                d_move(old_dentry, new_dentry);
+                /* ensure target dentry is invalidated, despite
+                   rehashing bug in vfs_rename_dir */
+                new_dentry->d_time = jiffies;
+                ceph_dentry(new_dentry)->lease_shared_gen = 0;
        }
        ceph_mdsc_put_request(req);
        return err;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4add3d5da2c1..ed6f19721d6e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -665,7 +665,8 @@ more:
                 * throw out any page cache pages in this range. this
                 * may block.
                 */
-                truncate_inode_pages_range(inode->i_mapping, pos, pos+len);
+                truncate_inode_pages_range(inode->i_mapping, pos, 
+                                           (pos+len) | (PAGE_CACHE_SIZE-1));
        } else {
                pages = alloc_page_vector(num_pages);
                if (IS_ERR(pages)) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 26f883c275e8..85b4d2ffdeba 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -733,6 +733,10 @@ no_change:
                                __ceph_get_fmode(ci, cap_fmode);
                        spin_unlock(&inode->i_lock);
                }
+        } else if (cap_fmode >= 0) {
+                pr_warning("mds issued no caps on %llx.%llx\n",
+                           ceph_vinop(inode));
+                __ceph_get_fmode(ci, cap_fmode);
        }
        /* update delegation info? */
@@ -997,6 +1001,10 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                             dn, dn->d_name.len, dn->d_name.name);
                        dout("fill_trace doing d_move %p -> %p\n",
                             req->r_old_dentry, dn);
+                        /* d_move screws up d_subdirs order */
+                        ceph_i_clear(dir, CEPH_I_COMPLETE);
                        d_move(req->r_old_dentry, dn);
                        dout(" src %p '%.*s' dst %p '%.*s'\n",
                             req->r_old_dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 60a9a4ae47be..24561a557e01 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -736,9 +736,10 @@ static void cleanup_cap_releases(struct ceph_mds_session *session)
 }
 /*
- * Helper to safely iterate over all caps associated with a session.
+ * Helper to safely iterate over all caps associated with a session, with
+ * special care taken to handle a racing __ceph_remove_cap().
 *
- * caller must hold session s_mutex
+ * Caller must hold session s_mutex.
 */
 static int iterate_session_caps(struct ceph_mds_session *session,
                                 int (*cb)(struct inode *, struct ceph_cap *,
@@ -2136,7 +2137,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
        struct ceph_mds_session *session = NULL;
        struct ceph_msg *reply;
        struct rb_node *p;
-        int err;
+        int err = -ENOMEM;
        struct ceph_pagelist *pagelist;
        pr_info("reconnect to recovering mds%d\n", mds);
@@ -2185,7 +2186,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
                goto fail;
        err = iterate_session_caps(session, encode_caps_cb, pagelist);
        if (err < 0)
-                goto out;
+                goto fail;
        /*
         * snaprealms.  we provide mds with the ino, seq (version), and
@@ -2213,28 +2214,31 @@ send:
        reply->nr_pages = calc_pages_for(0, pagelist->length);
        ceph_con_send(&session->s_con, reply);
-        if (session) {
+        session->s_state = CEPH_MDS_SESSION_OPEN;
-                session->s_state = CEPH_MDS_SESSION_OPEN;
+        mutex_unlock(&session->s_mutex);
-                __wake_requests(mdsc, &session->s_waiting);
-        }
+        mutex_lock(&mdsc->mutex);
+        __wake_requests(mdsc, &session->s_waiting);
+        mutex_unlock(&mdsc->mutex);
+        ceph_put_mds_session(session);
-out:
        up_read(&mdsc->snap_rwsem);
-        if (session) {
-                mutex_unlock(&session->s_mutex);
-                ceph_put_mds_session(session);
-        }
        mutex_lock(&mdsc->mutex);
        return;
 fail:
        ceph_msg_put(reply);
+        up_read(&mdsc->snap_rwsem);
+        mutex_unlock(&session->s_mutex);
+        ceph_put_mds_session(session);
 fail_nomsg:
        ceph_pagelist_release(pagelist);
        kfree(pagelist);
 fail_nopagelist:
-        pr_err("ENOMEM preparing reconnect for mds%d\n", mds);
+        pr_err("error %d preparing reconnect for mds%d\n", err, mds);
-        goto out;
+        mutex_lock(&mdsc->mutex);
+        return;
 }
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index cdaaa131add3..cd4fadb6491a 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -492,7 +492,14 @@ static void prepare_write_message(struct ceph_connection *con)
                list_move_tail(&m->list_head, &con->out_sent);
        }
-        m->hdr.seq = cpu_to_le64(++con->out_seq);
+        /*
+         * only assign outgoing seq # if we haven't sent this message
+         * yet.  if it is requeued, resend with it's original seq.
+         */
+        if (m->needs_out_seq) {
+                m->hdr.seq = cpu_to_le64(++con->out_seq);
+                m->needs_out_seq = false;
+        }
        dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n",
             m, con->out_seq, le16_to_cpu(m->hdr.type),
@@ -1334,6 +1341,7 @@ static int read_partial_message(struct ceph_connection *con)
        unsigned front_len, middle_len, data_len, data_off;
        int datacrc = con->msgr->nocrc;
        int skip;
+        u64 seq;
        dout("read_partial_message con %p msg %p\n", con, m);
@@ -1368,6 +1376,25 @@ static int read_partial_message(struct ceph_connection *con)
                return -EIO;
        data_off = le16_to_cpu(con->in_hdr.data_off);
+        /* verify seq# */
+        seq = le64_to_cpu(con->in_hdr.seq);
+        if ((s64)seq - (s64)con->in_seq < 1) {
+                pr_info("skipping %s%lld %s seq %lld, expected %lld\n",
+                        ENTITY_NAME(con->peer_name),
+                        pr_addr(&con->peer_addr.in_addr),
+                        seq, con->in_seq + 1);
+                con->in_base_pos = -front_len - middle_len - data_len -
+                        sizeof(m->footer);
+                con->in_tag = CEPH_MSGR_TAG_READY;
+                con->in_seq++;
+                return 0;
+        } else if ((s64)seq - (s64)con->in_seq > 1) {
+                pr_err("read_partial_message bad seq %lld expected %lld\n",
+                       seq, con->in_seq + 1);
+                con->error_msg = "bad message sequence # for incoming message";
+                return -EBADMSG;
+        }
        /* allocate message? */
        if (!con->in_msg) {
                dout("got hdr type %d front %d data %d\n", con->in_hdr.type,
@@ -1379,6 +1406,7 @@ static int read_partial_message(struct ceph_connection *con)
                        con->in_base_pos = -front_len - middle_len - data_len -
                                sizeof(m->footer);
                        con->in_tag = CEPH_MSGR_TAG_READY;
+                        con->in_seq++;
                        return 0;
                }
                if (IS_ERR(con->in_msg)) {
@@ -1965,6 +1993,8 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg)
        BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len));
+        msg->needs_out_seq = true;
        /* queue */
        mutex_lock(&con->mutex);
        BUG_ON(!list_empty(&msg->list_head));
@@ -2030,6 +2060,7 @@ void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg)
                ceph_msg_put(con->in_msg);
                con->in_msg = NULL;
                con->in_tag = CEPH_MSGR_TAG_READY;
+                con->in_seq++;
        } else {
                dout("con_revoke_pages %p msg %p pages %p no-op\n",
                     con, con->in_msg, msg);
@@ -2063,15 +2094,19 @@ struct ceph_msg *ceph_msg_new(int type, int front_len,
        kref_init(&m->kref);
        INIT_LIST_HEAD(&m->list_head);
+        m->hdr.tid = 0;
        m->hdr.type = cpu_to_le16(type);
+        m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+        m->hdr.version = 0;
        m->hdr.front_len = cpu_to_le32(front_len);
        m->hdr.middle_len = 0;
        m->hdr.data_len = cpu_to_le32(page_len);
        m->hdr.data_off = cpu_to_le16(page_off);
-        m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT);
+        m->hdr.reserved = 0;
        m->footer.front_crc = 0;
        m->footer.middle_crc = 0;
        m->footer.data_crc = 0;
+        m->footer.flags = 0;
        m->front_max = front_len;
        m->front_is_vmalloc = false;
        m->more_to_follow = false;
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h
index a343dae73cdc..a5caf91cc971 100644
--- a/fs/ceph/messenger.h
+++ b/fs/ceph/messenger.h
@@ -86,6 +86,7 @@ struct ceph_msg {
        struct kref kref;
        bool front_is_vmalloc;
        bool more_to_follow;
+        bool needs_out_seq;
        int front_max;
        struct ceph_msgpool *pool;
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c
index c7b4dedaace6..3514f71ff85f 100644
--- a/fs/ceph/osd_client.c
+++ b/fs/ceph/osd_client.c
@@ -565,7 +565,8 @@ static int __map_osds(struct ceph_osd_client *osdc,
 {
        struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base;
        struct ceph_pg pgid;
-        int o = -1;
+        int acting[CEPH_PG_MAX_SIZE];
+        int o = -1, num = 0;
        int err;
        dout("map_osds %p tid %lld\n", req, req->r_tid);
@@ -576,10 +577,16 @@ static int __map_osds(struct ceph_osd_client *osdc,
        pgid = reqhead->layout.ol_pgid;
        req->r_pgid = pgid;
-        o = ceph_calc_pg_primary(osdc->osdmap, pgid);
+        err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting);
+        if (err > 0) {
+                o = acting[0];
+                num = err;
+        }
        if ((req->r_osd && req->r_osd->o_osd == o &&
-             req->r_sent >= req->r_osd->o_incarnation) ||
+             req->r_sent >= req->r_osd->o_incarnation &&
+             req->r_num_pg_osds == num &&
+             memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
            (req->r_osd == NULL && o == -1))
                return 0;  /* no change */
@@ -587,6 +594,10 @@ static int __map_osds(struct ceph_osd_client *osdc,
             req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o,
             req->r_osd ? req->r_osd->o_osd : -1);
+        /* record full pg acting set */
+        memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num);
+        req->r_num_pg_osds = num;
        if (req->r_osd) {
                __cancel_request(req);
                list_del_init(&req->r_osd_item);
@@ -612,7 +623,7 @@ static int __map_osds(struct ceph_osd_client *osdc,
                __remove_osd_from_lru(req->r_osd);
                list_add(&req->r_osd_item, &req->r_osd->o_requests);
        }
-        err = 1;   /* osd changed */
+        err = 1;   /* osd or pg changed */
 out:
        return err;
@@ -779,16 +790,18 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
        struct ceph_osd_request *req;
        u64 tid;
        int numops, object_len, flags;
+        s32 result;
        tid = le64_to_cpu(msg->hdr.tid);
        if (msg->front.iov_len < sizeof(*rhead))
                goto bad;
        numops = le32_to_cpu(rhead->num_ops);
        object_len = le32_to_cpu(rhead->object_len);
+        result = le32_to_cpu(rhead->result);
        if (msg->front.iov_len != sizeof(*rhead) + object_len +
            numops * sizeof(struct ceph_osd_op))
                goto bad;
-        dout("handle_reply %p tid %llu\n", msg, tid);
+        dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result);
        /* lookup */
        mutex_lock(&osdc->request_mutex);
@@ -834,7 +847,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
        dout("handle_reply tid %llu flags %d\n", tid, flags);
        /* either this is a read, or we got the safe response */
-        if ((flags & CEPH_OSD_FLAG_ONDISK) ||
+        if (result < 0 ||
+            (flags & CEPH_OSD_FLAG_ONDISK) ||
            ((flags & CEPH_OSD_FLAG_WRITE) == 0))
                __unregister_request(osdc, req);
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h
index b0759911e7c3..ce776989ef6a 100644
--- a/fs/ceph/osd_client.h
+++ b/fs/ceph/osd_client.h
@@ -48,6 +48,8 @@ struct ceph_osd_request {
        struct list_head r_osd_item;
        struct ceph_osd *r_osd;
        struct ceph_pg   r_pgid;
+        int              r_pg_osds[CEPH_PG_MAX_SIZE];
+        int              r_num_pg_osds;
        struct ceph_connection *r_con_filling_msg;
@@ -66,7 +68,6 @@ struct ceph_osd_request {
        struct list_head  r_unsafe_item;
        struct inode *r_inode;                /* for use by callbacks */
-        struct writeback_control *r_wbc;      /* ditto */
        char              r_oid[40];          /* object name */
        int               r_oid_len;
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c
index 2e2c15eed82a..cfdd8f4388b7 100644
--- a/fs/ceph/osdmap.c
+++ b/fs/ceph/osdmap.c
@@ -1041,12 +1041,33 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
 }
 /*
+ * Return acting set for given pgid.
+ */
+int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+                        int *acting)
+{
+        int rawosds[CEPH_PG_MAX_SIZE], *osds;
+        int i, o, num = CEPH_PG_MAX_SIZE;
+        osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
+        if (!osds)
+                return -1;
+        /* primary is first up osd */
+        o = 0;
+        for (i = 0; i < num; i++)
+                if (ceph_osd_is_up(osdmap, osds[i]))
+                        acting[o++] = osds[i];
+        return o;
+}
+/*
 * Return primary osd for given pgid, or -1 if none.
 */
 int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
 {
-        int rawosds[10], *osds;
+        int rawosds[CEPH_PG_MAX_SIZE], *osds;
-        int i, num = ARRAY_SIZE(rawosds);
+        int i, num = CEPH_PG_MAX_SIZE;
        osds = calc_pg_raw(osdmap, pgid, rawosds, &num);
        if (!osds)
@@ -1054,9 +1075,7 @@ int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid)
        /* primary is first up osd */
        for (i = 0; i < num; i++)
-                if (ceph_osd_is_up(osdmap, osds[i])) {
+                if (ceph_osd_is_up(osdmap, osds[i]))
                        return osds[i];
-                        break;
-                }
        return -1;
 }
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h
index 8bc9f1e4f562..970b547e510d 100644
--- a/fs/ceph/osdmap.h
+++ b/fs/ceph/osdmap.h
@@ -120,6 +120,8 @@ extern int ceph_calc_object_layout(struct ceph_object_layout *ol,
                                   const char *oid,
                                   struct ceph_file_layout *fl,
                                   struct ceph_osdmap *osdmap);
+extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
+                               int *acting);
 extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
                                struct ceph_pg pgid);
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h
index a1fc1d017b58..fd56451a871f 100644
--- a/fs/ceph/rados.h
+++ b/fs/ceph/rados.h
@@ -58,6 +58,7 @@ struct ceph_timespec {
 #define CEPH_PG_LAYOUT_LINEAR 2
 #define CEPH_PG_LAYOUT_HYBRID 3
+#define CEPH_PG_MAX_SIZE      16  /* max # osds in a single pg */
 /*
 * placement group.
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 2b881262ef67..d5114db70453 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -869,16 +869,20 @@ skip_inode:
                                continue;
                        ci = ceph_inode(inode);
                        spin_lock(&inode->i_lock);
-                        if (!ci->i_snap_realm)
+                        if (list_empty(&ci->i_snap_realm_item)) {
-                                goto split_skip_inode;
+                                struct ceph_snap_realm *oldrealm =
-                        ceph_put_snap_realm(mdsc, ci->i_snap_realm);
+                                        ci->i_snap_realm;
-                        spin_lock(&realm->inodes_with_caps_lock);
-                        list_add(&ci->i_snap_realm_item,
+                                dout(" moving %p to split realm %llx %p\n",
-                                 &realm->inodes_with_caps);
+                                     inode, realm->ino, realm);
-                        ci->i_snap_realm = realm;
+                                spin_lock(&realm->inodes_with_caps_lock);
-                        spin_unlock(&realm->inodes_with_caps_lock);
+                                list_add(&ci->i_snap_realm_item,
-                        ceph_get_snap_realm(mdsc, realm);
+                                         &realm->inodes_with_caps);
-split_skip_inode:
+                                ci->i_snap_realm = realm;
+                                spin_unlock(&realm->inodes_with_caps_lock);
+                                ceph_get_snap_realm(mdsc, realm);
+                                ceph_put_snap_realm(mdsc, oldrealm);
+                        }
                        spin_unlock(&inode->i_lock);
                        iput(inode);
                }
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 75d02eaa1279..110857ba9269 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -47,10 +47,20 @@ const char *ceph_file_part(const char *s, int len)
 */
 static void ceph_put_super(struct super_block *s)
 {
-        struct ceph_client *cl = ceph_client(s);
+        struct ceph_client *client = ceph_sb_to_client(s);
        dout("put_super\n");
-        ceph_mdsc_close_sessions(&cl->mdsc);
+        ceph_mdsc_close_sessions(&client->mdsc);
+        /*
+         * ensure we release the bdi before put_anon_super releases
+         * the device name.
+         */
+        if (s->s_bdi == &client->backing_dev_info) {
+                bdi_unregister(&client->backing_dev_info);
+                s->s_bdi = NULL;
+        }
        return;
 }
@@ -636,6 +646,8 @@ static void ceph_destroy_client(struct ceph_client *client)
        destroy_workqueue(client->pg_inv_wq);
        destroy_workqueue(client->trunc_wq);
+        bdi_destroy(&client->backing_dev_info);
        if (client->msgr)
                ceph_messenger_destroy(client->msgr);
        mempool_destroy(client->wb_pagevec_pool);
@@ -876,14 +888,14 @@ static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client)
 {
        int err;
-        sb->s_bdi = &client->backing_dev_info;
        /* set ra_pages based on rsize mount option? */
        if (client->mount_args->rsize >= PAGE_CACHE_SIZE)
                client->backing_dev_info.ra_pages =
                        (client->mount_args->rsize + PAGE_CACHE_SIZE - 1)
                        >> PAGE_SHIFT;
        err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
+        if (!err)
+                sb->s_bdi = &client->backing_dev_info;
        return err;
 }
@@ -957,9 +969,6 @@ static void ceph_kill_sb(struct super_block *s)
        dout("kill_sb %p\n", s);
        ceph_mdsc_pre_umount(&client->mdsc);
        kill_anon_super(s);    /* will call put_super after sb is r/o */
-        if (s->s_bdi == &client->backing_dev_info)
-                bdi_unregister(&client->backing_dev_info);
-        bdi_destroy(&client->backing_dev_info);
        ceph_destroy_client(client);
 }
@@ -996,9 +1005,10 @@ static int __init init_ceph(void)
        if (ret)
                goto out_icache;
-        pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n",
+        pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n",
-                CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH,
+                CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL,
-                CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL);
+                CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT,
+                CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT);
        return 0;
 out_icache:
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e30dfbb056c3..13513b80d87f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -10,6 +10,7 @@
 #include <linux/fs.h>
 #include <linux/mempool.h>
 #include <linux/pagemap.h>
+#include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/writeback.h>
 #include <linux/slab.h>
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ecf0ffbe2b64..0c2fd17439c8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -502,6 +502,7 @@ struct dfs_info3_param {
 #define CIFS_FATTR_DFS_REFERRAL         0x1
 #define CIFS_FATTR_DELETE_PENDING       0x2
 #define CIFS_FATTR_NEED_REVAL           0x4
+#define CIFS_FATTR_INO_COLLISION        0x8
 struct cifs_fattr {
        u32             cf_flags;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35ec11716213..29b9ea244c81 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -715,6 +715,16 @@ cifs_find_inode(struct inode *inode, void *opaque)
        if (CIFS_I(inode)->uniqueid != fattr->cf_uniqueid)
                return 0;
+        /*
+         * uh oh -- it's a directory. We can't use it since hardlinked dirs are
+         * verboten. Disable serverino and return it as if it were found, the
+         * caller can discard it, generate a uniqueid and retry the find
+         */
+        if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) {
+                fattr->cf_flags |= CIFS_FATTR_INO_COLLISION;
+                cifs_autodisable_serverino(CIFS_SB(inode->i_sb));
+        }
        return 1;
 }
@@ -734,15 +744,22 @@ cifs_iget(struct super_block *sb, struct cifs_fattr *fattr)
        unsigned long hash;
        struct inode *inode;
+retry_iget5_locked:
        cFYI(1, ("looking for uniqueid=%llu", fattr->cf_uniqueid));
        /* hash down to 32-bits on 32-bit arch */
        hash = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid);
        inode = iget5_locked(sb, hash, cifs_find_inode, cifs_init_inode, fattr);
-        /* we have fattrs in hand, update the inode */
        if (inode) {
+                /* was there a problematic inode number collision? */
+                if (fattr->cf_flags & CIFS_FATTR_INO_COLLISION) {
+                        iput(inode);
+                        fattr->cf_uniqueid = iunique(sb, ROOT_I);
+                        fattr->cf_flags &= ~CIFS_FATTR_INO_COLLISION;
+                        goto retry_iget5_locked;
+                }
                cifs_fattr_to_inode(inode, fattr);
                if (sb->s_flags & MS_NOATIME)
                        inode->i_flags |= S_NOATIME | S_NOCMTIME;
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc478..05448730f840 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
        if (retval < 0)
                goto out;
-        current->stack_start = current->mm->start_stack;
        /* execve succeeded */
        current->fs->in_exec = 0;
        current->in_execve = 0;
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205aa..0b502f80c691 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
                configfs_detach_group(sd->s_element);
                child->d_inode->i_flags |= S_DEAD;
+                dont_mount(child);
                mutex_unlock(&child->d_inode->i_mutex);
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
                        mutex_lock(&dentry->d_inode->i_mutex);
                        configfs_remove_dir(item);
                        dentry->d_inode->i_flags |= S_DEAD;
+                        dont_mount(dentry);
                        mutex_unlock(&dentry->d_inode->i_mutex);
                        d_delete(dentry);
                }
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
                if (ret) {
                        configfs_detach_item(item);
                        dentry->d_inode->i_flags |= S_DEAD;
+                        dont_mount(dentry);
                }
                configfs_adjust_dir_dirent_depth_after_populate(sd);
                mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
        mutex_unlock(&configfs_symlink_mutex);
        configfs_detach_group(&group->cg_item);
        dentry->d_inode->i_flags |= S_DEAD;
+        dont_mount(dentry);
        mutex_unlock(&dentry->d_inode->i_mutex);
        d_delete(dentry);
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b9..e6e94c626c2c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
        if (retval < 0)
                goto out;
-        current->stack_start = current->mm->start_stack;
        /* execve succeeded */
        current->fs->in_exec = 0;
        current->in_execve = 0;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 157382fa6256..b66832ac33ac 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -446,10 +446,8 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
        /* initialize the mount flag and determine the default error handler */
        flag = JFS_ERR_REMOUNT_RO;
-        if (!parse_options((char *) data, sb, &newLVSize, &flag)) {
+        if (!parse_options((char *) data, sb, &newLVSize, &flag))
-                kfree(sbi);
+                goto out_kfree;
-                return -EINVAL;
-        }
        sbi->flag = flag;
 #ifdef CONFIG_JFS_POSIX_ACL
@@ -458,7 +456,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
        if (newLVSize) {
                printk(KERN_ERR "resize option for remount only\n");
-                return -EINVAL;
+                goto out_kfree;
        }
        /*
@@ -478,7 +476,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
        inode = new_inode(sb);
        if (inode == NULL) {
                ret = -ENOMEM;
-                goto out_kfree;
+                goto out_unload;
        }
        inode->i_ino = 0;
        inode->i_nlink = 1;
@@ -550,9 +548,10 @@ out_mount_failed:
        make_bad_inode(sbi->direct_inode);
        iput(sbi->direct_inode);
        sbi->direct_inode = NULL;
-out_kfree:
+out_unload:
        if (sbi->nls_tab)
                unload_nls(sbi->nls_tab);
+out_kfree:
        kfree(sbi);
        return ret;
 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5866ee6e1327..d7c23ed8349a 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -333,27 +333,27 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
                goto fail;
        sb->s_root = d_alloc_root(rootdir);
-        if (!sb->s_root)
+        if (!sb->s_root) {
-                goto fail2;
+                iput(rootdir);
+                goto fail;
+        }
        super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
        if (!super->s_erase_page)
-                goto fail2;
+                goto fail;
        memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
        /* FIXME: check for read-only mounts */
        err = logfs_make_writeable(sb);
        if (err)
-                goto fail3;
+                goto fail1;
        log_super("LogFS: Finished mounting\n");
        simple_set_mnt(mnt, sb);
        return 0;
-fail3:
+fail1:
        __free_page(super->s_erase_page);
-fail2:
-        iput(rootdir);
 fail:
        iput(logfs_super(sb)->s_master_inode);
        return -EIO;
diff --git a/fs/namei.c b/fs/namei.c
index a7dce91a7e42..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1641,7 +1641,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
        if (nd->last.name[nd->last.len]) {
                if (open_flag & O_CREAT)
                        goto exit;
-                nd->flags |= LOOKUP_DIRECTORY;
+                nd->flags |= LOOKUP_DIRECTORY | LOOKUP_FOLLOW;
        }
        /* just plain open? */
@@ -1830,6 +1830,8 @@ reval:
        }
        if (open_flag & O_DIRECTORY)
                nd.flags |= LOOKUP_DIRECTORY;
+        if (!(open_flag & O_NOFOLLOW))
+                nd.flags |= LOOKUP_FOLLOW;
        filp = do_last(&nd, &path, open_flag, acc_mode, mode, pathname);
        while (unlikely(!filp)) { /* trailing symlink */
                struct path holder;
@@ -1837,7 +1839,7 @@ reval:
                void *cookie;
                error = -ELOOP;
                /* S_ISDIR part is a temporary automount kludge */
-                if ((open_flag & O_NOFOLLOW) && !S_ISDIR(inode->i_mode))
+                if (!(nd.flags & LOOKUP_FOLLOW) && !S_ISDIR(inode->i_mode))
                        goto exit_dput;
                if (count++ == 32)
                        goto exit_dput;
@@ -2174,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
                error = security_inode_rmdir(dir, dentry);
                if (!error) {
                        error = dir->i_op->rmdir(dir, dentry);
-                        if (!error)
+                        if (!error) {
                                dentry->d_inode->i_flags |= S_DEAD;
+                                dont_mount(dentry);
+                        }
                }
        }
        mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2259,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
                if (!error) {
                        error = dir->i_op->unlink(dir, dentry);
                        if (!error)
-                                dentry->d_inode->i_flags |= S_DEAD;
+                                dont_mount(dentry);
                }
        }
        mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2570,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
                return error;
        target = new_dentry->d_inode;
-        if (target) {
+        if (target)
                mutex_lock(&target->i_mutex);
-                dentry_unhash(new_dentry);
-        }
        if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
                error = -EBUSY;
-        else 
+        else {
+                if (target)
+                        dentry_unhash(new_dentry);
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+        }
        if (target) {
-                if (!error)
+                if (!error) {
                        target->i_flags |= S_DEAD;
+                        dont_mount(new_dentry);
+                }
                mutex_unlock(&target->i_mutex);
                if (d_unhashed(new_dentry))
                        d_rehash(new_dentry);
@@ -2612,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
                error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
        if (!error) {
                if (target)
-                        target->i_flags |= S_DEAD;
+                        dont_mount(new_dentry);
                if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                        d_move(old_dentry, new_dentry);
        }
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..f20cb57d1067 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
        err = -ENOENT;
        mutex_lock(&path->dentry->d_inode->i_mutex);
-        if (IS_DEADDIR(path->dentry->d_inode))
+        if (cant_mount(path->dentry))
                goto out_unlock;
        err = security_sb_check_sb(mnt, path);
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name)
        err = -ENOENT;
        mutex_lock(&path->dentry->d_inode->i_mutex);
-        if (IS_DEADDIR(path->dentry->d_inode))
+        if (cant_mount(path->dentry))
                goto out1;
        if (d_unlinked(path->dentry))
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
        if (!check_mnt(root.mnt))
                goto out2;
        error = -ENOENT;
-        if (IS_DEADDIR(new.dentry->d_inode))
+        if (cant_mount(old.dentry))
                goto out2;
        if (d_unlinked(new.dentry))
                goto out2;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 15671245c6ee..ea61d26e7871 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -24,6 +24,8 @@
 static void nfs_do_free_delegation(struct nfs_delegation *delegation)
 {
+        if (delegation->cred)
+                put_rpccred(delegation->cred);
        kfree(delegation);
 }
@@ -36,13 +38,7 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
 static void nfs_free_delegation(struct nfs_delegation *delegation)
 {
-        struct rpc_cred *cred;
-        cred = rcu_dereference(delegation->cred);
-        rcu_assign_pointer(delegation->cred, NULL);
        call_rcu(&delegation->rcu, nfs_free_delegation_callback);
-        if (cred)
-                put_rpccred(cred);
 }
 void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
@@ -129,21 +125,35 @@ again:
 */
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res)
 {
-        struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+        struct nfs_delegation *delegation;
-        struct rpc_cred *oldcred;
+        struct rpc_cred *oldcred = NULL;
-        if (delegation == NULL)
+        rcu_read_lock();
-                return;
+        delegation = rcu_dereference(NFS_I(inode)->delegation);
-        memcpy(delegation->stateid.data, res->delegation.data,
+        if (delegation != NULL) {
-                        sizeof(delegation->stateid.data));
+                spin_lock(&delegation->lock);
-        delegation->type = res->delegation_type;
+                if (delegation->inode != NULL) {
-        delegation->maxsize = res->maxsize;
+                        memcpy(delegation->stateid.data, res->delegation.data,
-        oldcred = delegation->cred;
+                               sizeof(delegation->stateid.data));
-        delegation->cred = get_rpccred(cred);
+                        delegation->type = res->delegation_type;
-        clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+                        delegation->maxsize = res->maxsize;
-        NFS_I(inode)->delegation_state = delegation->type;
+                        oldcred = delegation->cred;
-        smp_wmb();
+                        delegation->cred = get_rpccred(cred);
-        put_rpccred(oldcred);
+                        clear_bit(NFS_DELEGATION_NEED_RECLAIM,
+                                  &delegation->flags);
+                        NFS_I(inode)->delegation_state = delegation->type;
+                        spin_unlock(&delegation->lock);
+                        put_rpccred(oldcred);
+                        rcu_read_unlock();
+                } else {
+                        /* We appear to have raced with a delegation return. */
+                        spin_unlock(&delegation->lock);
+                        rcu_read_unlock();
+                        nfs_inode_set_delegation(inode, cred, res);
+                }
+        } else {
+                rcu_read_unlock();
+        }
 }
 static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
@@ -166,9 +176,13 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation
        return inode;
 }
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi,
+                                                           const nfs4_stateid *stateid,
+                                                           struct nfs_client *clp)
 {
-        struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+        struct nfs_delegation *delegation =
+                rcu_dereference_protected(nfsi->delegation,
+                                          lockdep_is_held(&clp->cl_lock));
        if (delegation == NULL)
                goto nomatch;
@@ -195,7 +209,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 {
        struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
        struct nfs_inode *nfsi = NFS_I(inode);
-        struct nfs_delegation *delegation;
+        struct nfs_delegation *delegation, *old_delegation;
        struct nfs_delegation *freeme = NULL;
        int status = 0;
@@ -213,10 +227,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
        spin_lock_init(&delegation->lock);
        spin_lock(&clp->cl_lock);
-        if (rcu_dereference(nfsi->delegation) != NULL) {
+        old_delegation = rcu_dereference_protected(nfsi->delegation,
-                if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
+                                                   lockdep_is_held(&clp->cl_lock));
-                                        sizeof(delegation->stateid)) == 0 &&
+        if (old_delegation != NULL) {
-                                delegation->type == nfsi->delegation->type) {
+                if (memcmp(&delegation->stateid, &old_delegation->stateid,
+                                        sizeof(old_delegation->stateid)) == 0 &&
+                                delegation->type == old_delegation->type) {
                        goto out;
                }
                /*
@@ -226,12 +242,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
                dfprintk(FILE, "%s: server %s handed out "
                                "a duplicate delegation!\n",
                                __func__, clp->cl_hostname);
-                if (delegation->type <= nfsi->delegation->type) {
+                if (delegation->type <= old_delegation->type) {
                        freeme = delegation;
                        delegation = NULL;
                        goto out;
                }
-                freeme = nfs_detach_delegation_locked(nfsi, NULL);
+                freeme = nfs_detach_delegation_locked(nfsi, NULL, clp);
        }
        list_add_rcu(&delegation->super_list, &clp->cl_delegations);
        nfsi->delegation_state = delegation->type;
@@ -301,7 +317,7 @@ restart:
                if (inode == NULL)
                        continue;
                spin_lock(&clp->cl_lock);
-                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
                spin_unlock(&clp->cl_lock);
                rcu_read_unlock();
                if (delegation != NULL) {
@@ -330,9 +346,9 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_delegation *delegation;
-        if (rcu_dereference(nfsi->delegation) != NULL) {
+        if (rcu_access_pointer(nfsi->delegation) != NULL) {
                spin_lock(&clp->cl_lock);
-                delegation = nfs_detach_delegation_locked(nfsi, NULL);
+                delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
                spin_unlock(&clp->cl_lock);
                if (delegation != NULL)
                        nfs_do_return_delegation(inode, delegation, 0);
@@ -346,9 +362,9 @@ int nfs_inode_return_delegation(struct inode *inode)
        struct nfs_delegation *delegation;
        int err = 0;
-        if (rcu_dereference(nfsi->delegation) != NULL) {
+        if (rcu_access_pointer(nfsi->delegation) != NULL) {
                spin_lock(&clp->cl_lock);
-                delegation = nfs_detach_delegation_locked(nfsi, NULL);
+                delegation = nfs_detach_delegation_locked(nfsi, NULL, clp);
                spin_unlock(&clp->cl_lock);
                if (delegation != NULL) {
                        nfs_msync_inode(inode);
@@ -526,7 +542,7 @@ restart:
                if (inode == NULL)
                        continue;
                spin_lock(&clp->cl_lock);
-                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
+                delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL, clp);
                spin_unlock(&clp->cl_lock);
                rcu_read_unlock();
                if (delegation != NULL)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 23c06f77f4ca..4eb9baa33081 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -443,8 +443,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        if (size_change)
                put_write_access(inode);
        if (!err)
-                if (EX_ISSYNC(fhp->fh_export))
+                commit_metadata(fhp);
-                        write_inode_now(inode, 1);
 out:
        return err;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0cdbc5e7655a..48145f505a6a 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -749,6 +749,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
        sb->s_export_op = &nilfs_export_ops;
        sb->s_root = NULL;
        sb->s_time_gran = 1;
+        sb->s_bdi = nilfs->ns_bdi;
        err = load_nilfs(nilfs, sbi);
        if (err)
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 3e56dbffe729..b3a159b21cfd 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -15,6 +15,7 @@ config INOTIFY
 config INOTIFY_USER
        bool "Inotify support for userspace"
+        select ANON_INODES
        select FSNOTIFY
        default y
        ---help---
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 1afb0a10229f..e27960cd76ab 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -28,6 +28,7 @@
 #include <linux/path.h> /* struct path */
 #include <linux/slab.h> /* kmem_* */
 #include <linux/types.h>
+#include <linux/sched.h>
 #include "inotify.h"
@@ -146,6 +147,7 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
        idr_for_each(&group->inotify_data.idr, idr_callback, group);
        idr_remove_all(&group->inotify_data.idr);
        idr_destroy(&group->inotify_data.idr);
+        free_uid(group->inotify_data.user);
 }
 void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 472cdf29ef82..e46ca685b9be 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -546,21 +546,24 @@ retry:
        if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
                goto out_err;
+        /* we are putting the mark on the idr, take a reference */
+        fsnotify_get_mark(&tmp_ientry->fsn_entry);
        spin_lock(&group->inotify_data.idr_lock);
        ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
                                group->inotify_data.last_wd+1,
                                &tmp_ientry->wd);
        spin_unlock(&group->inotify_data.idr_lock);
        if (ret) {
+                /* we didn't get on the idr, drop the idr reference */
+                fsnotify_put_mark(&tmp_ientry->fsn_entry);
                /* idr was out of memory allocate and try again */
                if (ret == -EAGAIN)
                        goto retry;
                goto out_err;
        }
-        /* we put the mark on the idr, take a reference */
-        fsnotify_get_mark(&tmp_ientry->fsn_entry);
        /* we are on the idr, now get on the inode */
        ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode);
        if (ret) {
@@ -578,16 +581,13 @@ retry:
        /* return the watch descriptor for this new entry */
        ret = tmp_ientry->wd;
-        /* match the ref from fsnotify_init_markentry() */
-        fsnotify_put_mark(&tmp_ientry->fsn_entry);
        /* if this mark added a new event update the group mask */
        if (mask & ~group->mask)
                fsnotify_recalc_group_mask(group);
 out_err:
-        if (ret < 0)
+        /* match the ref from fsnotify_init_markentry() */
-                kmem_cache_free(inotify_inode_mark_cachep, tmp_ientry);
+        fsnotify_put_mark(&tmp_ientry->fsn_entry);
        return ret;
 }
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index ecebb2276790..f9d5d3ffc75a 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -406,6 +406,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
                                struct buffer_head *bh)
 {
        int ret = 0;
+        struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
        mlog_entry_void();
@@ -425,6 +426,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
        get_bh(bh); /* for end_buffer_write_sync() */
        bh->b_end_io = end_buffer_write_sync;
+        ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check);
        submit_bh(WRITE, bh);
        wait_on_buffer(bh);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index a795eb91f4ea..12d5eb78a11a 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -184,9 +184,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
        BUG_ON(!lksb);
        /* only updates if this node masters the lockres */
+        spin_lock(&res->spinlock);
        if (res->owner == dlm->node_num) {
-                spin_lock(&res->spinlock);
                /* check the lksb flags for the direction */
                if (lksb->flags & DLM_LKSB_GET_LVB) {
                        mlog(0, "getting lvb from lockres for %s node\n",
@@ -201,8 +200,8 @@ static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
                 * here. In the future we might want to clear it at the time
                 * the put is actually done.
                 */
-                spin_unlock(&res->spinlock);
        }
+        spin_unlock(&res->spinlock);
        /* reset any lvb flags on the lksb */
        lksb->flags &= ~(DLM_LKSB_PUT_LVB|DLM_LKSB_GET_LVB);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 1b0de157a08c..b83d6107a1f5 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -112,20 +112,20 @@ MODULE_PARM_DESC(capabilities, DLMFS_CAPABILITIES);
 * O_RDONLY -> PRMODE level
 * O_WRONLY -> EXMODE level
 *
- * O_NONBLOCK -> LKM_NOQUEUE
+ * O_NONBLOCK -> NOQUEUE
 */
 static int dlmfs_decode_open_flags(int open_flags,
                                   int *level,
                                   int *flags)
 {
        if (open_flags & (O_WRONLY|O_RDWR))
-                *level = LKM_EXMODE;
+                *level = DLM_LOCK_EX;
        else
-                *level = LKM_PRMODE;
+                *level = DLM_LOCK_PR;
        *flags = 0;
        if (open_flags & O_NONBLOCK)
-                *flags |= LKM_NOQUEUE;
+                *flags |= DLM_LKF_NOQUEUE;
        return 0;
 }
@@ -166,7 +166,7 @@ static int dlmfs_file_open(struct inode *inode,
                 * to be able userspace to be able to distinguish a
                 * valid lock request from one that simply couldn't be
                 * granted. */
-                if (flags & LKM_NOQUEUE && status == -EAGAIN)
+                if (flags & DLM_LKF_NOQUEUE && status == -EAGAIN)
                        status = -ETXTBSY;
                kfree(fp);
                goto bail;
@@ -193,7 +193,7 @@ static int dlmfs_file_release(struct inode *inode,
        status = 0;
        if (fp) {
                level = fp->fp_lock_level;
-                if (level != LKM_IVMODE)
+                if (level != DLM_LOCK_IV)
                        user_dlm_cluster_unlock(&ip->ip_lockres, level);
                kfree(fp);
@@ -262,7 +262,7 @@ static ssize_t dlmfs_file_read(struct file *filp,
        if ((count + *ppos) > i_size_read(inode))
                readlen = i_size_read(inode) - *ppos;
        else
-                readlen = count - *ppos;
+                readlen = count;
        lvb_buf = kmalloc(readlen, GFP_NOFS);
        if (!lvb_buf)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 17947dc8341e..a5fbd9cea968 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -684,6 +684,7 @@ restarted_transaction:
                if (why == RESTART_META) {
                        mlog(0, "restarting function.\n");
                        restart_func = 1;
+                        status = 0;
                } else {
                        BUG_ON(why != RESTART_TRANS);
@@ -1981,18 +1982,18 @@ relock:
        /* communicate with ocfs2_dio_end_io */
        ocfs2_iocb_set_rw_locked(iocb, rw_level);
-        if (direct_io) {
+        ret = generic_segment_checks(iov, &nr_segs, &ocount,
-                ret = generic_segment_checks(iov, &nr_segs, &ocount,
+                                     VERIFY_READ);
-                                             VERIFY_READ);
+        if (ret)
-                if (ret)
+                goto out_dio;
-                        goto out_dio;
-                count = ocount;
+        count = ocount;
-                ret = generic_write_checks(file, ppos, &count,
+        ret = generic_write_checks(file, ppos, &count,
-                                           S_ISBLK(inode->i_mode));
+                                   S_ISBLK(inode->i_mode));
-                if (ret)
+        if (ret)
-                        goto out_dio;
+                goto out_dio;
+        if (direct_io) {
                written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos,
                                                    ppos, count, ocount);
                if (written < 0) {
@@ -2007,7 +2008,10 @@ relock:
                        goto out_dio;
                }
        } else {
-                written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
+                current->backing_dev_info = file->f_mapping->backing_dev_info;
+                written = generic_file_buffered_write(iocb, iov, nr_segs, *ppos,
+                                                      ppos, count, 0);
+                current->backing_dev_info = NULL;
        }
 out_dio:
@@ -2021,9 +2025,9 @@ out_dio:
                if (ret < 0)
                        written = ret;
-                if (!ret && (old_size != i_size_read(inode) ||
+                if (!ret && ((old_size != i_size_read(inode)) ||
-                    old_clusters != OCFS2_I(inode)->ip_clusters ||
+                             (old_clusters != OCFS2_I(inode)->ip_clusters) ||
-                    has_refcount)) {
+                             has_refcount)) {
                        ret = jbd2_journal_force_commit(osb->journal->j_journal);
                        if (ret < 0)
                                written = ret;
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 07cc8bb68b6d..af189887201c 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -558,6 +558,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
                handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
                if (IS_ERR(handle)) {
                        status = PTR_ERR(handle);
+                        handle = NULL;
                        mlog_errno(status);
                        goto out;
                }
@@ -639,11 +640,13 @@ static int ocfs2_remove_inode(struct inode *inode,
                goto bail_unlock;
        }
-        status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
+        if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
-                                  orphan_dir_bh);
+                status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
-        if (status < 0) {
+                                          orphan_dir_bh);
-                mlog_errno(status);
+                if (status < 0) {
-                goto bail_commit;
+                        mlog_errno(status);
+                        goto bail_commit;
+                }
        }
        /* set the inodes dtime */
@@ -722,38 +725,39 @@ static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb,
 static int ocfs2_wipe_inode(struct inode *inode,
                            struct buffer_head *di_bh)
 {
-        int status, orphaned_slot;
+        int status, orphaned_slot = -1;
        struct inode *orphan_dir_inode = NULL;
        struct buffer_head *orphan_dir_bh = NULL;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-        struct ocfs2_dinode *di;
+        struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
-        di = (struct ocfs2_dinode *) di_bh->b_data;
+        if (!(OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
-        orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
+                orphaned_slot = le16_to_cpu(di->i_orphaned_slot);
-        status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
+                status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot);
-        if (status)
+                if (status)
-                return status;
+                        return status;
-        orphan_dir_inode = ocfs2_get_system_file_inode(osb,
+                orphan_dir_inode = ocfs2_get_system_file_inode(osb,
-                                                       ORPHAN_DIR_SYSTEM_INODE,
+                                                               ORPHAN_DIR_SYSTEM_INODE,
-                                                       orphaned_slot);
+                                                               orphaned_slot);
-        if (!orphan_dir_inode) {
+                if (!orphan_dir_inode) {
-                status = -EEXIST;
+                        status = -EEXIST;
-                mlog_errno(status);
+                        mlog_errno(status);
-                goto bail;
+                        goto bail;
-        }
+                }
-        /* Lock the orphan dir. The lock will be held for the entire
+                /* Lock the orphan dir. The lock will be held for the entire
-         * delete_inode operation. We do this now to avoid races with
+                 * delete_inode operation. We do this now to avoid races with
-         * recovery completion on other nodes. */
+                 * recovery completion on other nodes. */
-        mutex_lock(&orphan_dir_inode->i_mutex);
+                mutex_lock(&orphan_dir_inode->i_mutex);
-        status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
+                status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
-        if (status < 0) {
+                if (status < 0) {
-                mutex_unlock(&orphan_dir_inode->i_mutex);
+                        mutex_unlock(&orphan_dir_inode->i_mutex);
-                mlog_errno(status);
+                        mlog_errno(status);
-                goto bail;
+                        goto bail;
+                }
        }
        /* we do this while holding the orphan dir lock because we
@@ -794,6 +798,9 @@ static int ocfs2_wipe_inode(struct inode *inode,
                mlog_errno(status);
 bail_unlock_dir:
+        if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)
+                return status;
        ocfs2_inode_unlock(orphan_dir_inode, 1);
        mutex_unlock(&orphan_dir_inode->i_mutex);
        brelse(orphan_dir_bh);
@@ -889,7 +896,8 @@ static int ocfs2_query_inode_wipe(struct inode *inode,
        /* Do some basic inode verification... */
        di = (struct ocfs2_dinode *) di_bh->b_data;
-        if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL))) {
+        if (!(di->i_flags & cpu_to_le32(OCFS2_ORPHANED_FL)) &&
+            !(oi->ip_flags & OCFS2_INODE_SKIP_ORPHAN_DIR)) {
                /*
                 * Inodes in the orphan dir must have ORPHANED_FL.  The only
                 * inodes that come back out of the orphan dir are reflink
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ba4fe07b293c..0b28e1921a39 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -100,6 +100,8 @@ struct ocfs2_inode_info
 #define OCFS2_INODE_MAYBE_ORPHANED      0x00000020
 /* Does someone have the file open O_DIRECT */
 #define OCFS2_INODE_OPEN_DIRECT         0x00000040
+/* Tell the inode wipe code it's not in orphan dir */
+#define OCFS2_INODE_SKIP_ORPHAN_DIR     0x00000080
 static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
 {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b1eb50ae4097..4cbb18f26c5f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -408,23 +408,28 @@ static int ocfs2_mknod(struct inode *dir,
                }
        }
-        status = ocfs2_add_entry(handle, dentry, inode,
+        /*
-                                 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
+         * Do this before adding the entry to the directory. We add
-                                 &lookup);
+         * also set d_op after success so that ->d_iput() will cleanup
-        if (status < 0) {
+         * the dentry lock even if ocfs2_add_entry() fails below.
+         */
+        status = ocfs2_dentry_attach_lock(dentry, inode,
+                                          OCFS2_I(dir)->ip_blkno);
+        if (status) {
                mlog_errno(status);
                goto leave;
        }
+        dentry->d_op = &ocfs2_dentry_ops;
-        status = ocfs2_dentry_attach_lock(dentry, inode,
+        status = ocfs2_add_entry(handle, dentry, inode,
-                                          OCFS2_I(dir)->ip_blkno);
+                                 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
-        if (status) {
+                                 &lookup);
+        if (status < 0) {
                mlog_errno(status);
                goto leave;
        }
        insert_inode_hash(inode);
-        dentry->d_op = &ocfs2_dentry_ops;
        d_instantiate(dentry, inode);
        status = 0;
 leave:
@@ -445,11 +450,6 @@ leave:
        ocfs2_free_dir_lookup_result(&lookup);
-        if ((status < 0) && inode) {
-                clear_nlink(inode);
-                iput(inode);
-        }
        if (inode_ac)
                ocfs2_free_alloc_context(inode_ac);
@@ -459,6 +459,17 @@ leave:
        if (meta_ac)
                ocfs2_free_alloc_context(meta_ac);
+        /*
+         * We should call iput after the i_mutex of the bitmap been
+         * unlocked in ocfs2_free_alloc_context, or the
+         * ocfs2_delete_inode will mutex_lock again.
+         */
+        if ((status < 0) && inode) {
+                OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
+                clear_nlink(inode);
+                iput(inode);
+        }
        mlog_exit(status);
        return status;
@@ -1771,22 +1782,27 @@ static int ocfs2_symlink(struct inode *dir,
                }
        }
-        status = ocfs2_add_entry(handle, dentry, inode,
+        /*
-                                 le64_to_cpu(fe->i_blkno), parent_fe_bh,
+         * Do this before adding the entry to the directory. We add
-                                 &lookup);
+         * also set d_op after success so that ->d_iput() will cleanup
-        if (status < 0) {
+         * the dentry lock even if ocfs2_add_entry() fails below.
+         */
+        status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+        if (status) {
                mlog_errno(status);
                goto bail;
        }
+        dentry->d_op = &ocfs2_dentry_ops;
-        status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
+        status = ocfs2_add_entry(handle, dentry, inode,
-        if (status) {
+                                 le64_to_cpu(fe->i_blkno), parent_fe_bh,
+                                 &lookup);
+        if (status < 0) {
                mlog_errno(status);
                goto bail;
        }
        insert_inode_hash(inode);
-        dentry->d_op = &ocfs2_dentry_ops;
        d_instantiate(dentry, inode);
 bail:
        if (status < 0 && did_quota)
@@ -1811,6 +1827,7 @@ bail:
        if (xattr_ac)
                ocfs2_free_alloc_context(xattr_ac);
        if ((status < 0) && inode) {
+                OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR;
                clear_nlink(inode);
                iput(inode);
        }
@@ -1976,6 +1993,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
        }
        le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
+        OCFS2_I(inode)->ip_flags &= ~OCFS2_INODE_SKIP_ORPHAN_DIR;
        /* Record which orphan dir our inode now resides
         * in. delete_inode will use this to determine which orphan
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index bd96f6c7877e..5cbcd0f008fc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4083,6 +4083,9 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
        di->i_attr = s_di->i_attr;
        if (preserve) {
+                t_inode->i_uid = s_inode->i_uid;
+                t_inode->i_gid = s_inode->i_gid;
+                t_inode->i_mode = s_inode->i_mode;
                di->i_uid = s_di->i_uid;
                di->i_gid = s_di->i_gid;
                di->i_mode = s_di->i_mode;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e5..885ab5513ac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
-#include <linux/swapops.h>
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
                rsslim,
                mm ? mm->start_code : 0,
                mm ? mm->end_code : 0,
-                (permitted && mm) ? task->stack_start : 0,
+                (permitted && mm) ? mm->start_stack : 0,
                esp,
                eip,
                /* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd5..47f5b145f56e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
                                } else if (vma->vm_start <= mm->start_stack &&
                                           vma->vm_end >= mm->start_stack) {
                                        name = "[stack]";
-                                } else {
-                                        unsigned long stack_start;
-                                        struct proc_maps_private *pmp;
-                                        pmp = m->private;
-                                        stack_start = pmp->task->stack_start;
-                                        if (vma->vm_start <= stack_start &&
-                                            vma->vm_end >= stack_start) {
-                                                pad_len_spaces(m, len);
-                                                seq_printf(m,
-                                                 "[threadstack:%08lx]",
-#ifdef CONFIG_STACK_GROWSUP
-                                                 vma->vm_end - stack_start
-#else
-                                                 stack_start - vma->vm_start
-#endif
-                                                );
-                                        }
                                }
                        } else {
                                name = "[vdso]";
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 4e50286a4cc3..1dabed286b4c 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -164,8 +164,8 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
                                                        name, de->name))
                                        goto found;
                        }
+                        dir_put_page(page);
                }
-                dir_put_page(page);
                if (++n >= npages)
                        n = 0;
author	J. Bruce Fields <bfields@citi.umich.edu>	2010-06-08 20:05:18 -0400
committer	J. Bruce Fields <bfields@citi.umich.edu>	2010-06-08 20:05:18 -0400
commit	44b56603c4c476b845a824cff6fe905c6268b2a1 (patch)
tree	b7e792414fef2390718a657765719fbbb529ce84 /fs
parent	c3935e30495869dd611e1cd62253c94ebc7c6c04 (diff)
parent	b160fdabe93a8a53094f90f02bf4dcb500782aab (diff)