24 files changed, 318 insertions, 212 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 2d3e32ebfd15..8729cf68d2fe 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1552,7 +1552,6 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
 {
        int ret;
        int type;
-        struct btrfs_tree_block_info *info;
        struct btrfs_extent_inline_ref *eiref;
        if (*ptr == (unsigned long)-1)
@@ -1573,9 +1572,17 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
        }
        /* we can treat both ref types equally here */
-        info = (struct btrfs_tree_block_info *)(ei + 1);
        *out_root = btrfs_extent_inline_ref_offset(eb, eiref);
-        *out_level = btrfs_tree_block_level(eb, info);
+        if (key->type == BTRFS_EXTENT_ITEM_KEY) {
+                struct btrfs_tree_block_info *info;
+                info = (struct btrfs_tree_block_info *)(ei + 1);
+                *out_level = btrfs_tree_block_level(eb, info);
+        } else {
+                ASSERT(key->type == BTRFS_METADATA_ITEM_KEY);
+                *out_level = (u8)key->offset;
+        }
        if (ret == 1)
                *ptr = (unsigned long)-1;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 054577bddaf2..de4e70fb3cbb 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1857,6 +1857,14 @@ int btrfs_delayed_delete_inode_ref(struct inode *inode)
 {
        struct btrfs_delayed_node *delayed_node;
+        /*
+         * we don't do delayed inode updates during log recovery because it
+         * leads to enospc problems.  This means we also can't do
+         * delayed inode refs
+         */
+        if (BTRFS_I(inode)->root->fs_info->log_root_recovering)
+                return -EAGAIN;
        delayed_node = btrfs_get_or_create_delayed_node(inode);
        if (IS_ERR(delayed_node))
                return PTR_ERR(delayed_node);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a80b97100d90..15116585e714 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3139,9 +3139,11 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
        struct extent_buffer *leaf;
        ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
-        if (ret < 0)
+        if (ret) {
+                if (ret > 0)
+                        ret = -ENOENT;
                goto fail;
-        BUG_ON(ret); /* Corruption */
+        }
        leaf = path->nodes[0];
        bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
@@ -3149,11 +3151,9 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(leaf);
        btrfs_release_path(path);
 fail:
-        if (ret) {
+        if (ret)
                btrfs_abort_transaction(trans, root, ret);
-                return ret;
+        return ret;
-        }
-        return 0;
 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e687bb0dc73a..8bf326affb94 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6255,8 +6255,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 out_fail:
        btrfs_end_transaction(trans, root);
-        if (drop_on_err)
+        if (drop_on_err) {
+                inode_dec_link_count(inode);
                iput(inode);
+        }
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
        return err;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index f2bb13a23f86..9e1569ffbf6e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2607,9 +2607,9 @@ static int scrub_extent_for_parity(struct scrub_parity *sparity,
                ret = scrub_pages_for_parity(sparity, logical, l, physical, dev,
                                             flags, gen, mirror_num,
                                             have_csum ? csum : NULL);
-skip:
                if (ret)
                        return ret;
+skip:
                len -= l;
                logical += l;
                physical += l;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f5013d92a7e6..c81c0e004588 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1416,7 +1416,7 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
                }
        }
-        dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+        dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n",
             inode, ceph_vinop(inode), len, locked_page);
        if (len > 0) {
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e5d3eadf47b1..bed43081720f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        /* fallback to generic here if not in extents fmt */
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
-                return __generic_block_fiemap(inode, fieinfo, start, len,
+                return generic_block_fiemap(inode, fieinfo, start, len,
-                                              ext4_get_block);
+                        ext4_get_block);
        if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
                return -EBADR;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 513c12cf444c..8131be8c0af3 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -273,19 +273,24 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 * we determine this extent as a data or a hole according to whether the
 * page cache has data or not.
 */
-static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
+static int ext4_find_unwritten_pgoff(struct inode *inode,
-                                     loff_t endoff, loff_t *offset)
+                                     int whence,
+                                     struct ext4_map_blocks *map,
+                                     loff_t *offset)
 {
        struct pagevec pvec;
+        unsigned int blkbits;
        pgoff_t index;
        pgoff_t end;
+        loff_t endoff;
        loff_t startoff;
        loff_t lastoff;
        int found = 0;
+        blkbits = inode->i_sb->s_blocksize_bits;
        startoff = *offset;
        lastoff = startoff;
+        endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
        index = startoff >> PAGE_CACHE_SHIFT;
        end = endoff >> PAGE_CACHE_SHIFT;
@@ -403,144 +408,147 @@ out:
 static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
 {
        struct inode *inode = file->f_mapping->host;
-        struct fiemap_extent_info fie;
+        struct ext4_map_blocks map;
-        struct fiemap_extent ext[2];
+        struct extent_status es;
-        loff_t next;
+        ext4_lblk_t start, last, end;
-        int i, ret = 0;
+        loff_t dataoff, isize;
+        int blkbits;
+        int ret = 0;
        mutex_lock(&inode->i_mutex);
-        if (offset >= inode->i_size) {
+        isize = i_size_read(inode);
+        if (offset >= isize) {
                mutex_unlock(&inode->i_mutex);
                return -ENXIO;
        }
-        fie.fi_flags = 0;
-        fie.fi_extents_max = 2;
+        blkbits = inode->i_sb->s_blocksize_bits;
-        fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
+        start = offset >> blkbits;
-        while (1) {
+        last = start;
-                mm_segment_t old_fs = get_fs();
+        end = isize >> blkbits;
+        dataoff = offset;
-                fie.fi_extents_mapped = 0;
-                memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
+        do {
+                map.m_lblk = last;
-                set_fs(get_ds());
+                map.m_len = end - last + 1;
-                ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
+                ret = ext4_map_blocks(NULL, inode, &map, 0);
-                set_fs(old_fs);
+                if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
-                if (ret)
+                        if (last != start)
+                                dataoff = (loff_t)last << blkbits;
                        break;
+                }
-                /* No extents found, EOF */
+                /*
-                if (!fie.fi_extents_mapped) {
+                 * If there is a delay extent at this offset,
-                        ret = -ENXIO;
+                 * it will be as a data.
+                 */
+                ext4_es_find_delayed_extent_range(inode, last, last, &es);
+                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
+                        if (last != start)
+                                dataoff = (loff_t)last << blkbits;
                        break;
                }
-                for (i = 0; i < fie.fi_extents_mapped; i++) {
-                        next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
-                        if (offset < (loff_t)ext[i].fe_logical)
+                /*
-                                offset = (loff_t)ext[i].fe_logical;
+                 * If there is a unwritten extent at this offset,
-                        /*
+                 * it will be as a data or a hole according to page
-                         * If extent is not unwritten, then it contains valid
+                 * cache that has data or not.
-                         * data, mapped or delayed.
+                 */
-                         */
+                if (map.m_flags & EXT4_MAP_UNWRITTEN) {
-                        if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
+                        int unwritten;
-                                goto out;
+                        unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+                                                              &map, &dataoff);
+                        if (unwritten)
+                                break;
+                }
-                        /*
+                last++;
-                         * If there is a unwritten extent at this offset,
+                dataoff = (loff_t)last << blkbits;
-                         * it will be as a data or a hole according to page
+        } while (last <= end);
-                         * cache that has data or not.
-                         */
-                        if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
-                                                      next, &offset))
-                                goto out;
-                        if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
-                                ret = -ENXIO;
-                                goto out;
-                        }
-                        offset = next;
-                }
-        }
-        if (offset > inode->i_size)
-                offset = inode->i_size;
-out:
        mutex_unlock(&inode->i_mutex);
-        if (ret)
-                return ret;
-        return vfs_setpos(file, offset, maxsize);
+        if (dataoff > isize)
+                return -ENXIO;
+        return vfs_setpos(file, dataoff, maxsize);
 }
 /*
- * ext4_seek_hole() retrieves the offset for SEEK_HOLE
+ * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
 */
 static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
 {
        struct inode *inode = file->f_mapping->host;
-        struct fiemap_extent_info fie;
+        struct ext4_map_blocks map;
-        struct fiemap_extent ext[2];
+        struct extent_status es;
-        loff_t next;
+        ext4_lblk_t start, last, end;
-        int i, ret = 0;
+        loff_t holeoff, isize;
+        int blkbits;
+        int ret = 0;
        mutex_lock(&inode->i_mutex);
-        if (offset >= inode->i_size) {
+        isize = i_size_read(inode);
+        if (offset >= isize) {
                mutex_unlock(&inode->i_mutex);
                return -ENXIO;
        }
-        fie.fi_flags = 0;
+        blkbits = inode->i_sb->s_blocksize_bits;
-        fie.fi_extents_max = 2;
+        start = offset >> blkbits;
-        fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
+        last = start;
-        while (1) {
+        end = isize >> blkbits;
-                mm_segment_t old_fs = get_fs();
+        holeoff = offset;
-                fie.fi_extents_mapped = 0;
-                memset(ext, 0, sizeof(*ext));
-                set_fs(get_ds());
+        do {
-                ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
+                map.m_lblk = last;
-                set_fs(old_fs);
+                map.m_len = end - last + 1;
-                if (ret)
+                ret = ext4_map_blocks(NULL, inode, &map, 0);
-                        break;
+                if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
+                        last += ret;
+                        holeoff = (loff_t)last << blkbits;
+                        continue;
+                }
-                /* No extents found */
+                /*
-                if (!fie.fi_extents_mapped)
+                 * If there is a delay extent at this offset,
-                        break;
+                 * we will skip this extent.
+                 */
+                ext4_es_find_delayed_extent_range(inode, last, last, &es);
+                if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
+                        last = es.es_lblk + es.es_len;
+                        holeoff = (loff_t)last << blkbits;
+                        continue;
+                }
-                for (i = 0; i < fie.fi_extents_mapped; i++) {
+                /*
-                        next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
+                 * If there is a unwritten extent at this offset,
-                        /*
+                 * it will be as a data or a hole according to page
-                         * If extent is not unwritten, then it contains valid
+                 * cache that has data or not.
-                         * data, mapped or delayed.
+                 */
-                         */
+                if (map.m_flags & EXT4_MAP_UNWRITTEN) {
-                        if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
+                        int unwritten;
-                                if (offset < (loff_t)ext[i].fe_logical)
+                        unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
-                                        goto out;
+                                                              &map, &holeoff);
-                                offset = next;
+                        if (!unwritten) {
+                                last += ret;
+                                holeoff = (loff_t)last << blkbits;
                                continue;
                        }
-                        /*
-                         * If there is a unwritten extent at this offset,
-                         * it will be as a data or a hole according to page
-                         * cache that has data or not.
-                         */
-                        if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
-                                                      next, &offset))
-                                goto out;
-                        offset = next;
-                        if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
-                                goto out;
                }
-        }
-        if (offset > inode->i_size)
+                /* find a hole */
-                offset = inode->i_size;
+                break;
-out:
+        } while (last <= end);
        mutex_unlock(&inode->i_mutex);
-        if (ret)
-                return ret;
-        return vfs_setpos(file, offset, maxsize);
+        if (holeoff > isize)
+                holeoff = isize;
+        return vfs_setpos(file, holeoff, maxsize);
 }
 /*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index bf76f405a5f9..8a8ec6293b19 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -24,6 +24,18 @@ int ext4_resize_begin(struct super_block *sb)
                return -EPERM;
        /*
+         * If we are not using the primary superblock/GDT copy don't resize,
+         * because the user tools have no way of handling this.  Probably a
+         * bad time to do it anyways.
+         */
+        if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+                ext4_warning(sb, "won't resize using backup superblock at %llu",
+                        (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
+                return -EPERM;
+        }
+        /*
         * We are not allowed to do online-resizing on a filesystem mounted
         * with error, because it can destroy the filesystem easily.
         */
@@ -758,18 +770,6 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
                       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
                       gdb_num);
-        /*
-         * If we are not using the primary superblock/GDT copy don't resize,
-         * because the user tools have no way of handling this.  Probably a
-         * bad time to do it anyways.
-         */
-        if (EXT4_SB(sb)->s_sbh->b_blocknr !=
-            le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
-                ext4_warning(sb, "won't resize using backup superblock at %llu",
-                        (unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
-                return -EPERM;
-        }
        gdb_bh = sb_bread(sb, gdblock);
        if (!gdb_bh)
                return -EIO;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 43c92b1685cb..74c5f53595fb 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3482,7 +3482,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
                                       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
            EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
-                ext4_warning(sb, KERN_INFO "metadata_csum and uninit_bg are "
+                ext4_warning(sb, "metadata_csum and uninit_bg are "
                             "redundant flags; please run fsck.");
        /* Check for a known checksum algorithm */
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 99d440a4a6ba..ee85cd4e136a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,14 +740,15 @@ static int __init fcntl_init(void)
         * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
         * is defined as O_NONBLOCK on some platforms and not on others.
         */
-        BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+        BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
                O_RDONLY        | O_WRONLY      | O_RDWR        |
                O_CREAT         | O_EXCL        | O_NOCTTY      |
                O_TRUNC         | O_APPEND      | /* O_NONBLOCK | */
                __O_SYNC        | O_DSYNC       | FASYNC        |
                O_DIRECT        | O_LARGEFILE   | O_DIRECTORY   |
                O_NOFOLLOW      | O_NOATIME     | O_CLOEXEC     |
-                __FMODE_EXEC    | O_PATH        | __O_TMPFILE
+                __FMODE_EXEC    | O_PATH        | __O_TMPFILE   |
+                __FMODE_NONOTIFY
                ));
        fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba1107977f2e..ed19a7d622fa 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -131,6 +131,13 @@ static void fuse_req_init_context(struct fuse_req *req)
        req->in.h.pid = current->pid;
 }
+void fuse_set_initialized(struct fuse_conn *fc)
+{
+        /* Make sure stores before this are seen on another CPU */
+        smp_wmb();
+        fc->initialized = 1;
+}
 static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 {
        return !fc->initialized || (for_background && fc->blocked);
@@ -155,6 +162,8 @@ static struct fuse_req *__fuse_get_req(struct fuse_conn *fc, unsigned npages,
                if (intr)
                        goto out;
        }
+        /* Matches smp_wmb() in fuse_set_initialized() */
+        smp_rmb();
        err = -ENOTCONN;
        if (!fc->connected)
@@ -253,6 +262,8 @@ struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
        atomic_inc(&fc->num_waiting);
        wait_event(fc->blocked_waitq, fc->initialized);
+        /* Matches smp_wmb() in fuse_set_initialized() */
+        smp_rmb();
        req = fuse_request_alloc(0);
        if (!req)
                req = get_reserved_req(fc, file);
@@ -511,6 +522,39 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
 }
 EXPORT_SYMBOL_GPL(fuse_request_send);
+static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args)
+{
+        if (fc->minor < 4 && args->in.h.opcode == FUSE_STATFS)
+                args->out.args[0].size = FUSE_COMPAT_STATFS_SIZE;
+        if (fc->minor < 9) {
+                switch (args->in.h.opcode) {
+                case FUSE_LOOKUP:
+                case FUSE_CREATE:
+                case FUSE_MKNOD:
+                case FUSE_MKDIR:
+                case FUSE_SYMLINK:
+                case FUSE_LINK:
+                        args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+                        break;
+                case FUSE_GETATTR:
+                case FUSE_SETATTR:
+                        args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+                        break;
+                }
+        }
+        if (fc->minor < 12) {
+                switch (args->in.h.opcode) {
+                case FUSE_CREATE:
+                        args->in.args[0].size = sizeof(struct fuse_open_in);
+                        break;
+                case FUSE_MKNOD:
+                        args->in.args[0].size = FUSE_COMPAT_MKNOD_IN_SIZE;
+                        break;
+                }
+        }
+}
 ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
 {
        struct fuse_req *req;
@@ -520,6 +564,9 @@ ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
        if (IS_ERR(req))
                return PTR_ERR(req);
+        /* Needs to be done after fuse_get_req() so that fc->minor is valid */
+        fuse_adjust_compat(fc, args);
        req->in.h.opcode = args->in.h.opcode;
        req->in.h.nodeid = args->in.h.nodeid;
        req->in.numargs = args->in.numargs;
@@ -2127,7 +2174,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
        if (fc->connected) {
                fc->connected = 0;
                fc->blocked = 0;
-                fc->initialized = 1;
+                fuse_set_initialized(fc);
                end_io_requests(fc);
                end_queued_requests(fc);
                end_polls(fc);
@@ -2146,7 +2193,7 @@ int fuse_dev_release(struct inode *inode, struct file *file)
                spin_lock(&fc->lock);
                fc->connected = 0;
                fc->blocked = 0;
-                fc->initialized = 1;
+                fuse_set_initialized(fc);
                end_queued_requests(fc);
                end_polls(fc);
                wake_up_all(&fc->blocked_waitq);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 252b8a5de8b5..08e7b1a9d5d0 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -156,10 +156,7 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
        args->in.args[0].size = name->len + 1;
        args->in.args[0].value = name->name;
        args->out.numargs = 1;
-        if (fc->minor < 9)
+        args->out.args[0].size = sizeof(struct fuse_entry_out);
-                args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
-        else
-                args->out.args[0].size = sizeof(struct fuse_entry_out);
        args->out.args[0].value = outarg;
 }
@@ -422,16 +419,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
        args.in.h.opcode = FUSE_CREATE;
        args.in.h.nodeid = get_node_id(dir);
        args.in.numargs = 2;
-        args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+        args.in.args[0].size = sizeof(inarg);
-                                                sizeof(inarg);
        args.in.args[0].value = &inarg;
        args.in.args[1].size = entry->d_name.len + 1;
        args.in.args[1].value = entry->d_name.name;
        args.out.numargs = 2;
-        if (fc->minor < 9)
+        args.out.args[0].size = sizeof(outentry);
-                args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
-        else
-                args.out.args[0].size = sizeof(outentry);
        args.out.args[0].value = &outentry;
        args.out.args[1].size = sizeof(outopen);
        args.out.args[1].value = &outopen;
@@ -539,10 +532,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
        memset(&outarg, 0, sizeof(outarg));
        args->in.h.nodeid = get_node_id(dir);
        args->out.numargs = 1;
-        if (fc->minor < 9)
+        args->out.args[0].size = sizeof(outarg);
-                args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
-        else
-                args->out.args[0].size = sizeof(outarg);
        args->out.args[0].value = &outarg;
        err = fuse_simple_request(fc, args);
        if (err)
@@ -592,8 +582,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
        inarg.umask = current_umask();
        args.in.h.opcode = FUSE_MKNOD;
        args.in.numargs = 2;
-        args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+        args.in.args[0].size = sizeof(inarg);
-                                                sizeof(inarg);
        args.in.args[0].value = &inarg;
        args.in.args[1].size = entry->d_name.len + 1;
        args.in.args[1].value = entry->d_name.name;
@@ -899,10 +888,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
        args.in.args[0].size = sizeof(inarg);
        args.in.args[0].value = &inarg;
        args.out.numargs = 1;
-        if (fc->minor < 9)
+        args.out.args[0].size = sizeof(outarg);
-                args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
-        else
-                args.out.args[0].size = sizeof(outarg);
        args.out.args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err) {
@@ -1574,10 +1560,7 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
        args->in.args[0].size = sizeof(*inarg_p);
        args->in.args[0].value = inarg_p;
        args->out.numargs = 1;
-        if (fc->minor < 9)
+        args->out.args[0].size = sizeof(*outarg_p);
-                args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
-        else
-                args->out.args[0].size = sizeof(*outarg_p);
        args->out.args[0].value = outarg_p;
 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e0fc6725d1d0..1cdfb07c1376 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -906,4 +906,6 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc);
 int fuse_do_setattr(struct inode *inode, struct iattr *attr,
                    struct file *file);
+void fuse_set_initialized(struct fuse_conn *fc);
 #endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 6749109f255d..f38256e4476e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -424,8 +424,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
        args.in.h.opcode = FUSE_STATFS;
        args.in.h.nodeid = get_node_id(dentry->d_inode);
        args.out.numargs = 1;
-        args.out.args[0].size =
+        args.out.args[0].size = sizeof(outarg);
-                fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
        args.out.args[0].value = &outarg;
        err = fuse_simple_request(fc, &args);
        if (!err)
@@ -898,7 +897,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
                fc->max_write = max_t(unsigned, 4096, fc->max_write);
                fc->conn_init = 1;
        }
-        fc->initialized = 1;
+        fuse_set_initialized(fc);
        wake_up_all(&fc->blocked_waitq);
 }
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 37989f02a226..2d881b381d2b 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -201,10 +201,14 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns)
 static int kernfs_name_compare(unsigned int hash, const char *name,
                               const void *ns, const struct kernfs_node *kn)
 {
-        if (hash != kn->hash)
+        if (hash < kn->hash)
-                return hash - kn->hash;
+                return -1;
-        if (ns != kn->ns)
+        if (hash > kn->hash)
-                return ns - kn->ns;
+                return 1;
+        if (ns < kn->ns)
+                return -1;
+        if (ns > kn->ns)
+                return 1;
        return strcmp(name, kn->name);
 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index e94c887da2d7..55505cbe11af 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -138,10 +138,6 @@ lockd(void *vrqstp)
        dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
-        if (!nlm_timeout)
-                nlm_timeout = LOCKD_DFLT_TIMEO;
-        nlmsvc_timeout = nlm_timeout * HZ;
        /*
         * The main request loop. We don't terminate until the last
         * NFS mount or NFS daemon has gone away.
@@ -350,6 +346,10 @@ static struct svc_serv *lockd_create_svc(void)
                printk(KERN_WARNING
                        "lockd_up: no pid, %d users??\n", nlmsvc_users);
+        if (!nlm_timeout)
+                nlm_timeout = LOCKD_DFLT_TIMEO;
+        nlmsvc_timeout = nlm_timeout * HZ;
        serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, svc_rpcb_cleanup);
        if (!serv) {
                printk(KERN_WARNING "lockd_up: create service failed\n");
diff --git a/fs/locks.c b/fs/locks.c
index 735b8d3fa78c..59e2f905e4ff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1702,7 +1702,7 @@ static int generic_delete_lease(struct file *filp)
                        break;
        }
        trace_generic_delete_lease(inode, fl);
-        if (fl)
+        if (fl && IS_LEASE(fl))
                error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose);
        spin_unlock(&inode->i_lock);
        locks_dispose_list(&dispose);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 03311259b0c4..953daa44a282 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -228,6 +228,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
        kfree(clp->cl_serverowner);
        kfree(clp->cl_serverscope);
        kfree(clp->cl_implid);
+        kfree(clp->cl_owner_id);
 }
 void nfs4_free_client(struct nfs_client *clp)
@@ -452,6 +453,14 @@ static void nfs4_swap_callback_idents(struct nfs_client *keep,
        spin_unlock(&nn->nfs_client_lock);
 }
+static bool nfs4_match_client_owner_id(const struct nfs_client *clp1,
+                const struct nfs_client *clp2)
+{
+        if (clp1->cl_owner_id == NULL || clp2->cl_owner_id == NULL)
+                return true;
+        return strcmp(clp1->cl_owner_id, clp2->cl_owner_id) == 0;
+}
 /**
 * nfs40_walk_client_list - Find server that recognizes a client ID
 *
@@ -483,9 +492,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
                if (pos->rpc_ops != new->rpc_ops)
                        continue;
-                if (pos->cl_proto != new->cl_proto)
-                        continue;
                if (pos->cl_minorversion != new->cl_minorversion)
                        continue;
@@ -510,6 +516,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
                if (pos->cl_clientid != new->cl_clientid)
                        continue;
+                if (!nfs4_match_client_owner_id(pos, new))
+                        continue;
                atomic_inc(&pos->cl_count);
                spin_unlock(&nn->nfs_client_lock);
@@ -566,20 +575,14 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b)
 }
 /*
- * Returns true if the server owners match
+ * Returns true if the server major ids match
 */
 static bool
-nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b)
+nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b)
 {
        struct nfs41_server_owner *o1 = a->cl_serverowner;
        struct nfs41_server_owner *o2 = b->cl_serverowner;
-        if (o1->minor_id != o2->minor_id) {
-                dprintk("NFS: --> %s server owner minor IDs do not match\n",
-                        __func__);
-                return false;
-        }
        if (o1->major_id_sz != o2->major_id_sz)
                goto out_major_mismatch;
        if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
@@ -621,9 +624,6 @@ int nfs41_walk_client_list(struct nfs_client *new,
                if (pos->rpc_ops != new->rpc_ops)
                        continue;
-                if (pos->cl_proto != new->cl_proto)
-                        continue;
                if (pos->cl_minorversion != new->cl_minorversion)
                        continue;
@@ -654,7 +654,19 @@ int nfs41_walk_client_list(struct nfs_client *new,
                if (!nfs4_match_clientids(pos, new))
                        continue;
-                if (!nfs4_match_serverowners(pos, new))
+                /*
+                 * Note that session trunking is just a special subcase of
+                 * client id trunking. In either case, we want to fall back
+                 * to using the existing nfs_client.
+                 */
+                if (!nfs4_check_clientid_trunking(pos, new))
+                        continue;
+                /* Unlike NFSv4.0, we know that NFSv4.1 always uses the
+                 * uniform string, however someone might switch the
+                 * uniquifier string on us.
+                 */
+                if (!nfs4_match_client_owner_id(pos, new))
                        continue;
                atomic_inc(&pos->cl_count);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e7f8d5ff2581..c347705b0161 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1117,8 +1117,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
                return 0;
        if ((delegation->type & fmode) != fmode)
                return 0;
-        if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
-                return 0;
        if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
                return 0;
        nfs_mark_delegation_referenced(delegation);
@@ -4917,11 +4915,14 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
 }
 static unsigned int
-nfs4_init_nonuniform_client_string(const struct nfs_client *clp,
+nfs4_init_nonuniform_client_string(struct nfs_client *clp,
                                   char *buf, size_t len)
 {
        unsigned int result;
+        if (clp->cl_owner_id != NULL)
+                return strlcpy(buf, clp->cl_owner_id, len);
        rcu_read_lock();
        result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
                                clp->cl_ipaddr,
@@ -4930,24 +4931,32 @@ nfs4_init_nonuniform_client_string(const struct nfs_client *clp,
                                rpc_peeraddr2str(clp->cl_rpcclient,
                                                        RPC_DISPLAY_PROTO));
        rcu_read_unlock();
+        clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
        return result;
 }
 static unsigned int
-nfs4_init_uniform_client_string(const struct nfs_client *clp,
+nfs4_init_uniform_client_string(struct nfs_client *clp,
                                char *buf, size_t len)
 {
        const char *nodename = clp->cl_rpcclient->cl_nodename;
+        unsigned int result;
+        if (clp->cl_owner_id != NULL)
+                return strlcpy(buf, clp->cl_owner_id, len);
        if (nfs4_client_id_uniquifier[0] != '\0')
-                return scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
+                result = scnprintf(buf, len, "Linux NFSv%u.%u %s/%s",
                                clp->rpc_ops->version,
                                clp->cl_minorversion,
                                nfs4_client_id_uniquifier,
                                nodename);
-        return scnprintf(buf, len, "Linux NFSv%u.%u %s",
+        else
+                result = scnprintf(buf, len, "Linux NFSv%u.%u %s",
                                clp->rpc_ops->version, clp->cl_minorversion,
                                nodename);
+        clp->cl_owner_id = kstrdup(buf, GFP_KERNEL);
+        return result;
 }
 /*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3550a9c87616..c06a1ba80d73 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3897,11 +3897,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
                status = nfs4_setlease(dp);
                goto out;
        }
-        atomic_inc(&fp->fi_delegees);
        if (fp->fi_had_conflict) {
                status = -EAGAIN;
                goto out_unlock;
        }
+        atomic_inc(&fp->fi_delegees);
        hash_delegation_locked(dp, fp);
        status = 0;
 out_unlock:
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index c991616acca9..bff8567aa42d 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -259,16 +259,15 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
        struct fsnotify_event *kevent;
        char __user *start;
        int ret;
-        DEFINE_WAIT(wait);
+        DEFINE_WAIT_FUNC(wait, woken_wake_function);
        start = buf;
        group = file->private_data;
        pr_debug("%s: group=%p\n", __func__, group);
+        add_wait_queue(&group->notification_waitq, &wait);
        while (1) {
-                prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
                mutex_lock(&group->notification_mutex);
                kevent = get_one_event(group, count);
                mutex_unlock(&group->notification_mutex);
@@ -289,7 +288,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
                        if (start != buf)
                                break;
-                        schedule();
+                        wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
                        continue;
                }
@@ -318,8 +318,8 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
                buf += ret;
                count -= ret;
        }
+        remove_wait_queue(&group->notification_waitq, &wait);
-        finish_wait(&group->notification_waitq, &wait);
        if (start != buf && ret != -EFAULT)
                ret = buf - start;
        return ret;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 79b5af5e6a7b..cecd875653e4 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2023,11 +2023,8 @@ leave:
        dlm_lockres_drop_inflight_ref(dlm, res);
        spin_unlock(&res->spinlock);
-        if (ret < 0) {
+        if (ret < 0)
                mlog_errno(ret);
-                if (newlock)
-                        dlm_lock_put(newlock);
-        }
        return ret;
 }
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b931e04e3388..914c121ec890 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -94,6 +94,14 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
                                     struct inode *inode,
                                     const char *symname);
+static int ocfs2_double_lock(struct ocfs2_super *osb,
+                             struct buffer_head **bh1,
+                             struct inode *inode1,
+                             struct buffer_head **bh2,
+                             struct inode *inode2,
+                             int rename);
+static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2);
 /* An orphan dir name is an 8 byte value, printed as a hex string */
 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
@@ -678,8 +686,10 @@ static int ocfs2_link(struct dentry *old_dentry,
 {
        handle_t *handle;
        struct inode *inode = old_dentry->d_inode;
+        struct inode *old_dir = old_dentry->d_parent->d_inode;
        int err;
        struct buffer_head *fe_bh = NULL;
+        struct buffer_head *old_dir_bh = NULL;
        struct buffer_head *parent_fe_bh = NULL;
        struct ocfs2_dinode *fe = NULL;
        struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
@@ -696,19 +706,33 @@ static int ocfs2_link(struct dentry *old_dentry,
        dquot_initialize(dir);
-        err = ocfs2_inode_lock_nested(dir, &parent_fe_bh, 1, OI_LS_PARENT);
+        err = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
+                        &parent_fe_bh, dir, 0);
        if (err < 0) {
                if (err != -ENOENT)
                        mlog_errno(err);
                return err;
        }
+        /* make sure both dirs have bhs
+         * get an extra ref on old_dir_bh if old==new */
+        if (!parent_fe_bh) {
+                if (old_dir_bh) {
+                        parent_fe_bh = old_dir_bh;
+                        get_bh(parent_fe_bh);
+                } else {
+                        mlog(ML_ERROR, "%s: no old_dir_bh!\n", osb->uuid_str);
+                        err = -EIO;
+                        goto out;
+                }
+        }
        if (!dir->i_nlink) {
                err = -ENOENT;
                goto out;
        }
-        err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name,
+        err = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
                        old_dentry->d_name.len, &old_de_ino);
        if (err) {
                err = -ENOENT;
@@ -801,10 +825,11 @@ out_unlock_inode:
        ocfs2_inode_unlock(inode, 1);
 out:
-        ocfs2_inode_unlock(dir, 1);
+        ocfs2_double_unlock(old_dir, dir);
        brelse(fe_bh);
        brelse(parent_fe_bh);
+        brelse(old_dir_bh);
        ocfs2_free_dir_lookup_result(&lookup);
@@ -1072,14 +1097,15 @@ static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
 }
 /*
- * The only place this should be used is rename!
+ * The only place this should be used is rename and link!
 * if they have the same id, then the 1st one is the only one locked.
 */
 static int ocfs2_double_lock(struct ocfs2_super *osb,
                             struct buffer_head **bh1,
                             struct inode *inode1,
                             struct buffer_head **bh2,
-                             struct inode *inode2)
+                             struct inode *inode2,
+                             int rename)
 {
        int status;
        int inode1_is_ancestor, inode2_is_ancestor;
@@ -1127,7 +1153,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
                }
                /* lock id2 */
                status = ocfs2_inode_lock_nested(inode2, bh2, 1,
-                                                 OI_LS_RENAME1);
+                                rename == 1 ? OI_LS_RENAME1 : OI_LS_PARENT);
                if (status < 0) {
                        if (status != -ENOENT)
                                mlog_errno(status);
@@ -1136,7 +1162,8 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
        }
        /* lock id1 */
-        status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_RENAME2);
+        status = ocfs2_inode_lock_nested(inode1, bh1, 1,
+                        rename == 1 ?  OI_LS_RENAME2 : OI_LS_PARENT);
        if (status < 0) {
                /*
                 * An error return must mean that no cluster locks
@@ -1252,7 +1279,7 @@ static int ocfs2_rename(struct inode *old_dir,
        /* if old and new are the same, this'll just do one lock. */
        status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
-                                   &new_dir_bh, new_dir);
+                                   &new_dir_bh, new_dir, 1);
        if (status < 0) {
                mlog_errno(status);
                goto bail;