58 files changed, 737 insertions, 603 deletions
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index d8d8e7ba6a1e..eb1cc92cd67d 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -110,6 +110,7 @@ struct autofs_sb_info {
        int sub_version;
        int min_proto;
        int max_proto;
+        int compat_daemon;
        unsigned long exp_timeout;
        unsigned int type;
        int reghost_enabled;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 76741d8d7786..85f1fcdb30e7 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -385,6 +385,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
                sbi->pipefd = pipefd;
                sbi->pipe = pipe;
                sbi->catatonic = 0;
+                sbi->compat_daemon = is_compat_task();
        }
 out:
        mutex_unlock(&sbi->wq_mutex);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 450f529a4eae..1feb68ecef95 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -124,6 +124,7 @@ start:
        /* Negative dentry - try next */
        if (!simple_positive(q)) {
                spin_unlock(&p->d_lock);
+                lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
                p = q;
                goto again;
        }
@@ -186,6 +187,7 @@ again:
        /* Negative dentry - try next */
        if (!simple_positive(ret)) {
                spin_unlock(&p->d_lock);
+                lock_set_subclass(&ret->d_lock.dep_map, 0, _RET_IP_);
                p = ret;
                goto again;
        }
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e16980b00b8d..06858d955120 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -19,6 +19,7 @@
 #include <linux/parser.h>
 #include <linux/bitops.h>
 #include <linux/magic.h>
+#include <linux/compat.h>
 #include "autofs_i.h"
 #include <linux/module.h>
@@ -224,6 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
        set_autofs_type_indirect(&sbi->type);
        sbi->min_proto = 0;
        sbi->max_proto = 0;
+        sbi->compat_daemon = is_compat_task();
        mutex_init(&sbi->wq_mutex);
        mutex_init(&sbi->pipe_mutex);
        spin_lock_init(&sbi->fs_lock);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index da8876d38a7b..9c098db43344 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -91,7 +91,24 @@ static int autofs4_write(struct autofs_sb_info *sbi,
        return (bytes > 0);
 }
-        
+/*
+ * The autofs_v5 packet was misdesigned.
+ *
+ * The packets are identical on x86-32 and x86-64, but have different
+ * alignment. Which means that 'sizeof()' will give different results.
+ * Fix it up for the case of running 32-bit user mode on a 64-bit kernel.
+ */
+static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi)
+{
+        size_t pktsz = sizeof(struct autofs_v5_packet);
+#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT)
+        if (sbi->compat_daemon > 0)
+                pktsz -= 4;
+#endif
+        return pktsz;
+}
 static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
                                 struct autofs_wait_queue *wq,
                                 int type)
@@ -155,8 +172,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
        {
                struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet;
-                pktsz = sizeof(*packet);
+                pktsz = autofs_v5_packet_size(sbi);
                packet->wait_queue_token = wq->wait_queue_token;
                packet->len = wq->name.len;
                memcpy(packet->name, wq->name.name, wq->name.len);
diff --git a/fs/bio.c b/fs/bio.c
index b1fe82cf88cf..b980ecde026a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -505,13 +505,9 @@ EXPORT_SYMBOL(bio_clone);
 int bio_get_nr_vecs(struct block_device *bdev)
 {
        struct request_queue *q = bdev_get_queue(bdev);
-        int nr_pages;
+        return min_t(unsigned,
+                     queue_max_segments(q),
-        nr_pages = ((queue_max_sectors(q) << 9) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+                     queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1);
-        if (nr_pages > queue_max_segments(q))
-                nr_pages = queue_max_segments(q);
-        return nr_pages;
 }
 EXPORT_SYMBOL(bio_get_nr_vecs);
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 633c701a287d..98f6bf10bbd4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -892,6 +892,8 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
                if (eb != eb_in)
                        free_extent_buffer(eb);
                ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
+                if (ret > 0)
+                        ret = -ENOENT;
                if (ret)
                        break;
                next_inum = found_key.offset;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index b669a7d8e499..d986824bb2b4 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -644,7 +644,7 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
 static int btrfsic_process_superblock(struct btrfsic_state *state,
                                      struct btrfs_fs_devices *fs_devices)
 {
-        int ret;
+        int ret = 0;
        struct btrfs_super_block *selected_super;
        struct list_head *dev_head = &fs_devices->devices;
        struct btrfs_device *device;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 14f1c5a0b2d2..d02c27cd14c7 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -588,6 +588,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                                   page_offset(bio->bi_io_vec->bv_page),
                                   PAGE_CACHE_SIZE);
        read_unlock(&em_tree->lock);
+        if (!em)
+                return -EIO;
        compressed_len = em->block_len;
        cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 27ebe61d3ccc..80b6486fd5e6 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -886,7 +886,7 @@ struct btrfs_block_rsv {
        u64 reserved;
        struct btrfs_space_info *space_info;
        spinlock_t lock;
-        unsigned int full:1;
+        unsigned int full;
 };
 /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 811d9f918b1c..534266fe505f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2260,6 +2260,12 @@ int open_ctree(struct super_block *sb,
                goto fail_sb_buffer;
        }
+        if (sectorsize < PAGE_SIZE) {
+                printk(KERN_WARNING "btrfs: Incompatible sector size "
+                       "found on %s\n", sb->s_id);
+                goto fail_sb_buffer;
+        }
        mutex_lock(&fs_info->chunk_mutex);
        ret = btrfs_read_sys_array(tree_root);
        mutex_unlock(&fs_info->chunk_mutex);
@@ -2301,6 +2307,12 @@ int open_ctree(struct super_block *sb,
        btrfs_close_extra_devices(fs_devices);
+        if (!fs_devices->latest_bdev) {
+                printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
+                       sb->s_id);
+                goto fail_tree_roots;
+        }
 retry_root_backup:
        blocksize = btrfs_level_size(tree_root,
                                     btrfs_super_root_level(disk_super));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 283af7a676a3..37e0a800d34e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3312,7 +3312,8 @@ commit_trans:
        }
        data_sinfo->bytes_may_use += bytes;
        trace_btrfs_space_reservation(root->fs_info, "space_info",
-                                      (u64)data_sinfo, bytes, 1);
+                                      (u64)(unsigned long)data_sinfo,
+                                      bytes, 1);
        spin_unlock(&data_sinfo->lock);
        return 0;
@@ -3333,7 +3334,8 @@ void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes)
        spin_lock(&data_sinfo->lock);
        data_sinfo->bytes_may_use -= bytes;
        trace_btrfs_space_reservation(root->fs_info, "space_info",
-                                      (u64)data_sinfo, bytes, 0);
+                                      (u64)(unsigned long)data_sinfo,
+                                      bytes, 0);
        spin_unlock(&data_sinfo->lock);
 }
@@ -3611,12 +3613,15 @@ static int may_commit_transaction(struct btrfs_root *root,
        if (space_info != delayed_rsv->space_info)
                return -ENOSPC;
+        spin_lock(&space_info->lock);
        spin_lock(&delayed_rsv->lock);
-        if (delayed_rsv->size < bytes) {
+        if (space_info->bytes_pinned + delayed_rsv->size < bytes) {
                spin_unlock(&delayed_rsv->lock);
+                spin_unlock(&space_info->lock);
                return -ENOSPC;
        }
        spin_unlock(&delayed_rsv->lock);
+        spin_unlock(&space_info->lock);
 commit:
        trans = btrfs_join_transaction(root);
@@ -3695,9 +3700,9 @@ again:
                if (used + orig_bytes <= space_info->total_bytes) {
                        space_info->bytes_may_use += orig_bytes;
                        trace_btrfs_space_reservation(root->fs_info,
-                                                      "space_info",
+                                              "space_info",
-                                                      (u64)space_info,
+                                              (u64)(unsigned long)space_info,
-                                                      orig_bytes, 1);
+                                              orig_bytes, 1);
                        ret = 0;
                } else {
                        /*
@@ -3766,9 +3771,9 @@ again:
                if (used + num_bytes < space_info->total_bytes + avail) {
                        space_info->bytes_may_use += orig_bytes;
                        trace_btrfs_space_reservation(root->fs_info,
-                                                      "space_info",
+                                              "space_info",
-                                                      (u64)space_info,
+                                              (u64)(unsigned long)space_info,
-                                                      orig_bytes, 1);
+                                              orig_bytes, 1);
                        ret = 0;
                } else {
                        wait_ordered = true;
@@ -3913,8 +3918,8 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
                        spin_lock(&space_info->lock);
                        space_info->bytes_may_use -= num_bytes;
                        trace_btrfs_space_reservation(fs_info, "space_info",
-                                                      (u64)space_info,
+                                              (u64)(unsigned long)space_info,
-                                                      num_bytes, 0);
+                                              num_bytes, 0);
                        space_info->reservation_progress++;
                        spin_unlock(&space_info->lock);
                }
@@ -4105,7 +4110,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
        num_bytes += div64_u64(data_used + meta_used, 50);
        if (num_bytes * 3 > meta_used)
-                num_bytes = div64_u64(meta_used, 3);
+                num_bytes = div64_u64(meta_used, 3) * 2;
        return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10);
 }
@@ -4132,14 +4137,14 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
                block_rsv->reserved += num_bytes;
                sinfo->bytes_may_use += num_bytes;
                trace_btrfs_space_reservation(fs_info, "space_info",
-                                              (u64)sinfo, num_bytes, 1);
+                                      (u64)(unsigned long)sinfo, num_bytes, 1);
        }
        if (block_rsv->reserved >= block_rsv->size) {
                num_bytes = block_rsv->reserved - block_rsv->size;
                sinfo->bytes_may_use -= num_bytes;
                trace_btrfs_space_reservation(fs_info, "space_info",
-                                              (u64)sinfo, num_bytes, 0);
+                                      (u64)(unsigned long)sinfo, num_bytes, 0);
                sinfo->reservation_progress++;
                block_rsv->reserved = block_rsv->size;
                block_rsv->full = 1;
@@ -4192,7 +4197,8 @@ void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
        if (!trans->bytes_reserved)
                return;
-        trace_btrfs_space_reservation(root->fs_info, "transaction", (u64)trans,
+        trace_btrfs_space_reservation(root->fs_info, "transaction",
+                                      (u64)(unsigned long)trans,
                                      trans->bytes_reserved, 0);
        btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
        trans->bytes_reserved = 0;
@@ -4710,9 +4716,9 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
                        space_info->bytes_reserved += num_bytes;
                        if (reserve == RESERVE_ALLOC) {
                                trace_btrfs_space_reservation(cache->fs_info,
-                                                              "space_info",
+                                              "space_info",
-                                                              (u64)space_info,
+                                              (u64)(unsigned long)space_info,
-                                                              num_bytes, 0);
+                                              num_bytes, 0);
                                space_info->bytes_may_use -= num_bytes;
                        }
                }
@@ -7886,9 +7892,16 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
        u64 start;
        u64 end;
        u64 trimmed = 0;
+        u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
        int ret = 0;
-        cache = btrfs_lookup_block_group(fs_info, range->start);
+        /*
+         * try to trim all FS space, our block group may start from non-zero.
+         */
+        if (range->len == total_bytes)
+                cache = btrfs_lookup_first_block_group(fs_info, range->start);
+        else
+                cache = btrfs_lookup_block_group(fs_info, range->start);
        while (cache) {
                if (cache->key.objectid >= (range->start + range->len)) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fcf77e1ded40..a55fbe6252de 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -513,6 +513,15 @@ hit_next:
        WARN_ON(state->end < start);
        last_end = state->end;
+        if (state->end < end && !need_resched())
+                next_node = rb_next(&state->rb_node);
+        else
+                next_node = NULL;
+        /* the state doesn't have the wanted bits, go ahead */
+        if (!(state->state & bits))
+                goto next;
        /*
         *     | ---- desired range ---- |
         *  | state | or
@@ -565,20 +574,15 @@ hit_next:
                goto out;
        }
-        if (state->end < end && prealloc && !need_resched())
-                next_node = rb_next(&state->rb_node);
-        else
-                next_node = NULL;
        set |= clear_state_bit(tree, state, &bits, wake);
+next:
        if (last_end == (u64)-1)
                goto out;
        start = last_end + 1;
        if (start <= end && next_node) {
                state = rb_entry(next_node, struct extent_state,
                                 rb_node);
-                if (state->start == start)
+                goto hit_next;
-                        goto hit_next;
        }
        goto search_again;
@@ -961,8 +965,6 @@ hit_next:
                set_state_bits(tree, state, &bits);
                clear_state_bit(tree, state, &clear_bits, 0);
-                merge_state(tree, state);
                if (last_end == (u64)-1)
                        goto out;
@@ -1007,7 +1009,6 @@ hit_next:
                if (state->end <= end) {
                        set_state_bits(tree, state, &bits);
                        clear_state_bit(tree, state, &clear_bits, 0);
-                        merge_state(tree, state);
                        if (last_end == (u64)-1)
                                goto out;
                        start = last_end + 1;
@@ -1068,8 +1069,6 @@ hit_next:
                set_state_bits(tree, prealloc, &bits);
                clear_state_bit(tree, prealloc, &clear_bits, 0);
-                merge_state(tree, prealloc);
                prealloc = NULL;
                goto out;
        }
@@ -2154,13 +2153,46 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                 "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode,
                 failrec->this_mirror, num_copies, failrec->in_validation);
-        tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror,
+        ret = tree->ops->submit_bio_hook(inode, read_mode, bio,
-                                        failrec->bio_flags, 0);
+                                         failrec->this_mirror,
-        return 0;
+                                         failrec->bio_flags, 0);
+        return ret;
 }
 /* lots and lots of room for performance fixes in the end_bio funcs */
+int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
+{
+        int uptodate = (err == 0);
+        struct extent_io_tree *tree;
+        int ret;
+        tree = &BTRFS_I(page->mapping->host)->io_tree;
+        if (tree->ops && tree->ops->writepage_end_io_hook) {
+                ret = tree->ops->writepage_end_io_hook(page, start,
+                                               end, NULL, uptodate);
+                if (ret)
+                        uptodate = 0;
+        }
+        if (!uptodate && tree->ops &&
+            tree->ops->writepage_io_failed_hook) {
+                ret = tree->ops->writepage_io_failed_hook(NULL, page,
+                                                 start, end, NULL);
+                /* Writeback already completed */
+                if (ret == 0)
+                        return 1;
+        }
+        if (!uptodate) {
+                clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
+                ClearPageUptodate(page);
+                SetPageError(page);
+        }
+        return 0;
+}
 /*
 * after a writepage IO is done, we need to:
 * clear the uptodate bits on error
@@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
 */
 static void end_bio_extent_writepage(struct bio *bio, int err)
 {
-        int uptodate = err == 0;
        struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
        struct extent_io_tree *tree;
        u64 start;
        u64 end;
        int whole_page;
-        int ret;
        do {
                struct page *page = bvec->bv_page;
@@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
                if (--bvec >= bio->bi_io_vec)
                        prefetchw(&bvec->bv_page->flags);
-                if (tree->ops && tree->ops->writepage_end_io_hook) {
-                        ret = tree->ops->writepage_end_io_hook(page, start,
-                                                       end, NULL, uptodate);
-                        if (ret)
-                                uptodate = 0;
-                }
-                if (!uptodate && tree->ops &&
-                    tree->ops->writepage_io_failed_hook) {
-                        ret = tree->ops->writepage_io_failed_hook(bio, page,
-                                                         start, end, NULL);
-                        if (ret == 0) {
-                                uptodate = (err == 0);
-                                continue;
-                        }
-                }
-                if (!uptodate) {
+                if (end_extent_writepage(page, err, start, end))
-                        clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
+                        continue;
-                        ClearPageUptodate(page);
-                        SetPageError(page);
-                }
                if (whole_page)
                        end_page_writeback(page);
@@ -2779,9 +2790,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                                delalloc_start = delalloc_end + 1;
                                continue;
                        }
-                        tree->ops->fill_delalloc(inode, page, delalloc_start,
+                        ret = tree->ops->fill_delalloc(inode, page,
-                                                 delalloc_end, &page_started,
+                                                       delalloc_start,
-                                                 &nr_written);
+                                                       delalloc_end,
+                                                       &page_started,
+                                                       &nr_written);
+                        BUG_ON(ret);
                        /*
                         * delalloc_end is already one less than the total
                         * length, so we don't subtract one from
@@ -2818,8 +2832,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
        if (tree->ops && tree->ops->writepage_start_hook) {
                ret = tree->ops->writepage_start_hook(page, start,
                                                      page_end);
-                if (ret == -EAGAIN) {
+                if (ret) {
-                        redirty_page_for_writepage(wbc, page);
+                        /* Fixup worker will requeue */
+                        if (ret == -EBUSY)
+                                wbc->pages_skipped++;
+                        else
+                                redirty_page_for_writepage(wbc, page);
                        update_nr_written(page, wbc, nr_written);
                        unlock_page(page);
                        ret = 0;
@@ -3289,7 +3307,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
                        len = end - start + 1;
                        write_lock(&map->lock);
                        em = lookup_extent_mapping(map, start, len);
-                        if (IS_ERR_OR_NULL(em)) {
+                        if (!em) {
                                write_unlock(&map->lock);
                                break;
                        }
@@ -3853,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
        num_pages = num_extent_pages(eb->start, eb->len);
        clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-        if (eb_straddles_pages(eb)) {
+        clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
-                clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
+                              cached_state, GFP_NOFS);
-                                      cached_state, GFP_NOFS);
-        }
        for (i = 0; i < num_pages; i++) {
                page = extent_buffer_page(eb, i);
                if (page)
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index bc6a042cb6fc..cecc3518c121 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -319,4 +319,5 @@ struct btrfs_mapping_tree;
 int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
                        u64 length, u64 logical, struct page *page,
                        int mirror_num);
+int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
 #endif
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 33a7890b1f40..1195f09761fe 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -26,8 +26,8 @@ struct extent_map {
        unsigned long flags;
        struct block_device *bdev;
        atomic_t refs;
-        unsigned int in_tree:1;
+        unsigned int in_tree;
-        unsigned int compress_type:4;
+        unsigned int compress_type;
 };
 struct extent_map_tree {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 859ba2dd8890..e8d06b6b9194 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1605,6 +1605,14 @@ static long btrfs_fallocate(struct file *file, int mode,
                return -EOPNOTSUPP;
        /*
+         * Make sure we have enough space before we do the
+         * allocation.
+         */
+        ret = btrfs_check_data_free_space(inode, len);
+        if (ret)
+                return ret;
+        /*
         * wait for ordered IO before we have any locks.  We'll loop again
         * below with the locks held.
         */
@@ -1667,27 +1675,12 @@ static long btrfs_fallocate(struct file *file, int mode,
                if (em->block_start == EXTENT_MAP_HOLE ||
                    (cur_offset >= inode->i_size &&
                     !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-                        /*
-                         * Make sure we have enough space before we do the
-                         * allocation.
-                         */
-                        ret = btrfs_check_data_free_space(inode, last_byte -
-                                                          cur_offset);
-                        if (ret) {
-                                free_extent_map(em);
-                                break;
-                        }
                        ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
                                                        last_byte - cur_offset,
                                                        1 << inode->i_blkbits,
                                                        offset + len,
                                                        &alloc_hint);
-                        /* Let go of our reservation. */
-                        btrfs_free_reserved_data_space(inode, last_byte -
-                                                       cur_offset);
                        if (ret < 0) {
                                free_extent_map(em);
                                break;
@@ -1715,6 +1708,8 @@ static long btrfs_fallocate(struct file *file, int mode,
                             &cached_state, GFP_NOFS);
 out:
        mutex_unlock(&inode->i_mutex);
+        /* Let go of our reservation. */
+        btrfs_free_reserved_data_space(inode, len);
        return ret;
 }
@@ -1761,7 +1756,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
                                                     start - root->sectorsize,
                                                     root->sectorsize, 0);
                if (IS_ERR(em)) {
-                        ret = -ENXIO;
+                        ret = PTR_ERR(em);
                        goto out;
                }
                last_end = em->start + em->len;
@@ -1773,7 +1768,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
        while (1) {
                em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
                if (IS_ERR(em)) {
-                        ret = -ENXIO;
+                        ret = PTR_ERR(em);
                        break;
                }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index c2f20594c9f7..710ea380c7ed 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -777,6 +777,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
        spin_lock(&block_group->lock);
        if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
                spin_unlock(&block_group->lock);
+                btrfs_free_path(path);
                goto out;
        }
        spin_unlock(&block_group->lock);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index 213ffa86ce1b..ee15d88b33d2 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -438,7 +438,8 @@ int btrfs_save_ino_cache(struct btrfs_root *root,
                                          trans->bytes_reserved);
        if (ret)
                goto out;
-        trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+        trace_btrfs_space_reservation(root->fs_info, "ino_cache",
+                                      (u64)(unsigned long)trans,
                                      trans->bytes_reserved, 1);
 again:
        inode = lookup_free_ino_inode(root, path);
@@ -500,7 +501,8 @@ again:
 out_put:
        iput(inode);
 out_release:
-        trace_btrfs_space_reservation(root->fs_info, "ino_cache", (u64)trans,
+        trace_btrfs_space_reservation(root->fs_info, "ino_cache",
+                                      (u64)(unsigned long)trans,
                                      trans->bytes_reserved, 0);
        btrfs_block_rsv_release(root, trans->block_rsv, trans->bytes_reserved);
 out:
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 32214fe0f7e3..892b34785ccc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1555,6 +1555,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
        struct inode *inode;
        u64 page_start;
        u64 page_end;
+        int ret;
        fixup = container_of(work, struct btrfs_writepage_fixup, work);
        page = fixup->page;
@@ -1582,12 +1583,21 @@ again:
                                     page_end, &cached_state, GFP_NOFS);
                unlock_page(page);
                btrfs_start_ordered_extent(inode, ordered, 1);
+                btrfs_put_ordered_extent(ordered);
                goto again;
        }
-        BUG();
+        ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+        if (ret) {
+                mapping_set_error(page->mapping, ret);
+                end_extent_writepage(page, ret, page_start, page_end);
+                ClearPageChecked(page);
+                goto out;
+         }
        btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
        ClearPageChecked(page);
+        set_page_dirty(page);
 out:
        unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
                             &cached_state, GFP_NOFS);
@@ -1630,7 +1640,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
        fixup->work.func = btrfs_writepage_fixup_worker;
        fixup->page = page;
        btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work);
-        return -EAGAIN;
+        return -EBUSY;
 }
 static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
@@ -4575,7 +4585,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
                ret = btrfs_insert_dir_item(trans, root, name, name_len,
                                            parent_inode, &key,
                                            btrfs_inode_type(inode), index);
-                BUG_ON(ret);
+                if (ret)
+                        goto fail_dir_item;
                btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                   name_len * 2);
@@ -4583,6 +4594,23 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
                ret = btrfs_update_inode(trans, root, parent_inode);
        }
        return ret;
+fail_dir_item:
+        if (unlikely(ino == BTRFS_FIRST_FREE_OBJECTID)) {
+                u64 local_index;
+                int err;
+                err = btrfs_del_root_ref(trans, root->fs_info->tree_root,
+                                 key.objectid, root->root_key.objectid,
+                                 parent_ino, &local_index, name, name_len);
+        } else if (add_backref) {
+                u64 local_index;
+                int err;
+                err = btrfs_del_inode_ref(trans, root, name, name_len,
+                                          ino, parent_ino, &local_index);
+        }
+        return ret;
 }
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
@@ -6696,8 +6724,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
        int err;
        u64 index = 0;
-        inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid,
+        inode = btrfs_new_inode(trans, new_root, NULL, "..", 2,
-                                new_dirid, S_IFDIR | 0700, &index);
+                                new_dirid, new_dirid,
+                                S_IFDIR | (~current_umask() & S_IRWXUGO),
+                                &index);
        if (IS_ERR(inode))
                return PTR_ERR(inode);
        inode->i_op = &btrfs_dir_inode_operations;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 03bb62a9ee24..d8b54715c2de 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -861,6 +861,7 @@ static int cluster_pages_for_defrag(struct inode *inode,
        int i_done;
        struct btrfs_ordered_extent *ordered;
        struct extent_state *cached_state = NULL;
+        struct extent_io_tree *tree;
        gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
        if (isize == 0)
@@ -871,18 +872,34 @@ static int cluster_pages_for_defrag(struct inode *inode,
                                           num_pages << PAGE_CACHE_SHIFT);
        if (ret)
                return ret;
-again:
-        ret = 0;
        i_done = 0;
+        tree = &BTRFS_I(inode)->io_tree;
        /* step one, lock all the pages */
        for (i = 0; i < num_pages; i++) {
                struct page *page;
+again:
                page = find_or_create_page(inode->i_mapping,
-                                            start_index + i, mask);
+                                           start_index + i, mask);
                if (!page)
                        break;
+                page_start = page_offset(page);
+                page_end = page_start + PAGE_CACHE_SIZE - 1;
+                while (1) {
+                        lock_extent(tree, page_start, page_end, GFP_NOFS);
+                        ordered = btrfs_lookup_ordered_extent(inode,
+                                                              page_start);
+                        unlock_extent(tree, page_start, page_end, GFP_NOFS);
+                        if (!ordered)
+                                break;
+                        unlock_page(page);
+                        btrfs_start_ordered_extent(inode, ordered, 1);
+                        btrfs_put_ordered_extent(ordered);
+                        lock_page(page);
+                }
                if (!PageUptodate(page)) {
                        btrfs_readpage(NULL, page);
                        lock_page(page);
@@ -893,15 +910,22 @@ again:
                                break;
                        }
                }
                isize = i_size_read(inode);
                file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
-                if (!isize || page->index > file_end ||
+                if (!isize || page->index > file_end) {
-                    page->mapping != inode->i_mapping) {
                        /* whoops, we blew past eof, skip this page */
                        unlock_page(page);
                        page_cache_release(page);
                        break;
                }
+                if (page->mapping != inode->i_mapping) {
+                        unlock_page(page);
+                        page_cache_release(page);
+                        goto again;
+                }
                pages[i] = page;
                i_done++;
        }
@@ -924,25 +948,6 @@ again:
        lock_extent_bits(&BTRFS_I(inode)->io_tree,
                         page_start, page_end - 1, 0, &cached_state,
                         GFP_NOFS);
-        ordered = btrfs_lookup_first_ordered_extent(inode, page_end - 1);
-        if (ordered &&
-            ordered->file_offset + ordered->len > page_start &&
-            ordered->file_offset < page_end) {
-                btrfs_put_ordered_extent(ordered);
-                unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-                                     page_start, page_end - 1,
-                                     &cached_state, GFP_NOFS);
-                for (i = 0; i < i_done; i++) {
-                        unlock_page(pages[i]);
-                        page_cache_release(pages[i]);
-                }
-                btrfs_wait_ordered_range(inode, page_start,
-                                         page_end - page_start);
-                goto again;
-        }
-        if (ordered)
-                btrfs_put_ordered_extent(ordered);
        clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start,
                          page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
                          EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
@@ -1327,6 +1332,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
                goto out;
        }
+        if (name[0] == '.' &&
+           (namelen == 1 || (name[1] == '.' && namelen == 2))) {
+                ret = -EEXIST;
+                goto out;
+        }
        if (subvol) {
                ret = btrfs_mksubvol(&file->f_path, name, namelen,
                                     NULL, transid, readonly);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 9770cc5bfb76..abc0fbffa510 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1367,7 +1367,8 @@ out:
 }
 static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
-        u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length)
+        u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length,
+        u64 dev_offset)
 {
        struct btrfs_mapping_tree *map_tree =
                &sdev->dev->dev_root->fs_info->mapping_tree;
@@ -1391,7 +1392,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev,
                goto out;
        for (i = 0; i < map->num_stripes; ++i) {
-                if (map->stripes[i].dev == sdev->dev) {
+                if (map->stripes[i].dev == sdev->dev &&
+                    map->stripes[i].physical == dev_offset) {
                        ret = scrub_stripe(sdev, map, i, chunk_offset, length);
                        if (ret)
                                goto out;
@@ -1487,7 +1489,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end)
                        break;
                }
                ret = scrub_chunk(sdev, chunk_tree, chunk_objectid,
-                                  chunk_offset, length);
+                                  chunk_offset, length, found_key.offset);
                btrfs_put_block_group(cache);
                if (ret)
                        break;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 287a6728b1ad..04b77e3ceb7a 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -327,7 +327,8 @@ again:
        if (num_bytes) {
                trace_btrfs_space_reservation(root->fs_info, "transaction",
-                                              (u64)h, num_bytes, 1);
+                                              (u64)(unsigned long)h,
+                                              num_bytes, 1);
                h->block_rsv = &root->fs_info->trans_block_rsv;
                h->bytes_reserved = num_bytes;
        }
@@ -915,7 +916,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                dentry->d_name.name, dentry->d_name.len,
                                parent_inode, &key,
                                BTRFS_FT_DIR, index);
-        BUG_ON(ret);
+        if (ret) {
+                pending->error = -EEXIST;
+                dput(parent);
+                goto fail;
+        }
        btrfs_i_size_write(parent_inode, parent_inode->i_size +
                                         dentry->d_name.len * 2);
@@ -993,12 +998,9 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
 {
        struct btrfs_pending_snapshot *pending;
        struct list_head *head = &trans->transaction->pending_snapshots;
-        int ret;
-        list_for_each_entry(pending, head, list) {
+        list_for_each_entry(pending, head, list)
-                ret = create_pending_snapshot(trans, fs_info, pending);
+                create_pending_snapshot(trans, fs_info, pending);
-                BUG_ON(ret);
-        }
        return 0;
 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0b4e2af7954d..ef41f285a475 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -459,12 +459,23 @@ int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *device, *next;
+        struct block_device *latest_bdev = NULL;
+        u64 latest_devid = 0;
+        u64 latest_transid = 0;
        mutex_lock(&uuid_mutex);
 again:
        /* This is the initialized path, it is safe to release the devices. */
        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
-                if (device->in_fs_metadata)
+                if (device->in_fs_metadata) {
+                        if (!latest_transid ||
+                            device->generation > latest_transid) {
+                                latest_devid = device->devid;
+                                latest_transid = device->generation;
+                                latest_bdev = device->bdev;
+                        }
                        continue;
+                }
                if (device->bdev) {
                        blkdev_put(device->bdev, device->mode);
@@ -487,6 +498,10 @@ again:
                goto again;
        }
+        fs_devices->latest_bdev = latest_bdev;
+        fs_devices->latest_devid = latest_devid;
+        fs_devices->latest_trans = latest_transid;
        mutex_unlock(&uuid_mutex);
        return 0;
 }
@@ -1953,7 +1968,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root,
        em = lookup_extent_mapping(em_tree, chunk_offset, 1);
        read_unlock(&em_tree->lock);
-        BUG_ON(em->start > chunk_offset ||
+        BUG_ON(!em || em->start > chunk_offset ||
               em->start + em->len < chunk_offset);
        map = (struct map_lookup *)em->bdev;
@@ -4356,6 +4371,20 @@ int btrfs_read_sys_array(struct btrfs_root *root)
                return -ENOMEM;
        btrfs_set_buffer_uptodate(sb);
        btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
+        /*
+         * The sb extent buffer is artifical and just used to read the system array.
+         * btrfs_set_buffer_uptodate() call does not properly mark all it's
+         * pages up-to-date when the page is larger: extent does not cover the
+         * whole page and consequently check_page_uptodate does not find all
+         * the page's extents up-to-date (the hole beyond sb),
+         * write_extent_buffer then triggers a WARN_ON.
+         *
+         * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
+         * but sb spans only this function. Add an explicit SetPageUptodate call
+         * to silence the warning eg. on PowerPC 64.
+         */
+        if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+                SetPageUptodate(sb->first_page);
        write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
        array_size = btrfs_super_sys_array_size(super_copy);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9c288653e6d6..602f77c304c9 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -773,10 +773,11 @@ standard_receive3(struct TCP_Server_Info *server, struct mid_q_entry *mid)
                cifs_dump_mem("Bad SMB: ", buf,
                        min_t(unsigned int, server->total_read, 48));
-        if (mid)
+        if (!mid)
-                handle_mid(mid, server, smb_buffer, length);
+                return length;
-        return length;
+        handle_mid(mid, server, smb_buffer, length);
+        return 0;
 }
 static int
@@ -2125,7 +2126,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
        down_read(&key->sem);
        upayload = key->payload.data;
        if (IS_ERR_OR_NULL(upayload)) {
-                rc = PTR_ERR(key);
+                rc = upayload ? PTR_ERR(upayload) : -EINVAL;
                goto out_key_put;
        }
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index df8fecb5b993..63a196b97d50 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -492,7 +492,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 {
        int xid;
        int rc = 0; /* to get around spurious gcc warning, set to zero here */
-        __u32 oplock = 0;
+        __u32 oplock = enable_oplocks ? REQ_OPLOCK : 0;
        __u16 fileHandle = 0;
        bool posix_open = false;
        struct cifs_sb_info *cifs_sb;
diff --git a/fs/compat.c b/fs/compat.c
index fa9d721ecfee..07880bae28a9 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -131,41 +131,35 @@ asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_tim
 static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
 {
-        compat_ino_t ino = stat->ino;
+        struct compat_stat tmp;
-        typeof(ubuf->st_uid) uid = 0;
-        typeof(ubuf->st_gid) gid = 0;
-        int err;
-        SET_UID(uid, stat->uid);
+        if (!old_valid_dev(stat->dev) || !old_valid_dev(stat->rdev))
-        SET_GID(gid, stat->gid);
+                return -EOVERFLOW;
-        if ((u64) stat->size > MAX_NON_LFS ||
+        memset(&tmp, 0, sizeof(tmp));
-            !old_valid_dev(stat->dev) ||
+        tmp.st_dev = old_encode_dev(stat->dev);
-            !old_valid_dev(stat->rdev))
+        tmp.st_ino = stat->ino;
+        if (sizeof(tmp.st_ino) < sizeof(stat->ino) && tmp.st_ino != stat->ino)
                return -EOVERFLOW;
-        if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino)
+        tmp.st_mode = stat->mode;
+        tmp.st_nlink = stat->nlink;
+        if (tmp.st_nlink != stat->nlink)
                return -EOVERFLOW;
+        SET_UID(tmp.st_uid, stat->uid);
-        if (clear_user(ubuf, sizeof(*ubuf)))
+        SET_GID(tmp.st_gid, stat->gid);
-                return -EFAULT;
+        tmp.st_rdev = old_encode_dev(stat->rdev);
+        if ((u64) stat->size > MAX_NON_LFS)
-        err  = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
+                return -EOVERFLOW;
-        err |= __put_user(ino, &ubuf->st_ino);
+        tmp.st_size = stat->size;
-        err |= __put_user(stat->mode, &ubuf->st_mode);
+        tmp.st_atime = stat->atime.tv_sec;
-        err |= __put_user(stat->nlink, &ubuf->st_nlink);
+        tmp.st_atime_nsec = stat->atime.tv_nsec;
-        err |= __put_user(uid, &ubuf->st_uid);
+        tmp.st_mtime = stat->mtime.tv_sec;
-        err |= __put_user(gid, &ubuf->st_gid);
+        tmp.st_mtime_nsec = stat->mtime.tv_nsec;
-        err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
+        tmp.st_ctime = stat->ctime.tv_sec;
-        err |= __put_user(stat->size, &ubuf->st_size);
+        tmp.st_ctime_nsec = stat->ctime.tv_nsec;
-        err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
+        tmp.st_blocks = stat->blocks;
-        err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
+        tmp.st_blksize = stat->blksize;
-        err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
+        return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
-        err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
-        err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
-        err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
-        err |= __put_user(stat->blksize, &ubuf->st_blksize);
-        err |= __put_user(stat->blocks, &ubuf->st_blocks);
-        return err;
 }
 asmlinkage long compat_sys_newstat(const char __user * filename,
diff --git a/fs/dcache.c b/fs/dcache.c
index 16a53cc2cc02..fe19ac13f75f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2968,7 +2968,7 @@ __setup("dhash_entries=", set_dhash_entries);
 static void __init dcache_init_early(void)
 {
-        int loop;
+        unsigned int loop;
        /* If hashes are distributed across NUMA nodes, defer
         * hash allocation until vmalloc space is available.
@@ -2986,13 +2986,13 @@ static void __init dcache_init_early(void)
                                        &d_hash_mask,
                                        0);
-        for (loop = 0; loop < (1 << d_hash_shift); loop++)
+        for (loop = 0; loop < (1U << d_hash_shift); loop++)
                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 }
 static void __init dcache_init(void)
 {
-        int loop;
+        unsigned int loop;
        /* 
         * A constructor could be added for stable state like the lists,
@@ -3016,7 +3016,7 @@ static void __init dcache_init(void)
                                        &d_hash_mask,
                                        0);
-        for (loop = 0; loop < (1 << d_hash_shift); loop++)
+        for (loop = 0; loop < (1U << d_hash_shift); loop++)
                INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4a588dbd11bf..f4aadd15b613 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -173,7 +173,7 @@ void inode_dio_wait(struct inode *inode)
        if (atomic_read(&inode->i_dio_count))
                __inode_dio_wait(inode);
 }
-EXPORT_SYMBOL_GPL(inode_dio_wait);
+EXPORT_SYMBOL(inode_dio_wait);
 /*
 * inode_dio_done - signal finish of a direct I/O requests
@@ -187,7 +187,7 @@ void inode_dio_done(struct inode *inode)
        if (atomic_dec_and_test(&inode->i_dio_count))
                wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
 }
-EXPORT_SYMBOL_GPL(inode_dio_done);
+EXPORT_SYMBOL(inode_dio_done);
 /*
 * How many pages are in the queue?
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 63ab24510649..ea9931281557 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1990,6 +1990,17 @@ out:
        return;
 }
+static size_t ecryptfs_max_decoded_size(size_t encoded_size)
+{
+        /* Not exact; conservatively long. Every block of 4
+         * encoded characters decodes into a block of 3
+         * decoded characters. This segment of code provides
+         * the caller with the maximum amount of allocated
+         * space that @dst will need to point to in a
+         * subsequent call. */
+        return ((encoded_size + 1) * 3) / 4;
+}
 /**
 * ecryptfs_decode_from_filename
 * @dst: If NULL, this function only sets @dst_size and returns. If
@@ -2008,13 +2019,7 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
        size_t dst_byte_offset = 0;
        if (dst == NULL) {
-                /* Not exact; conservatively long. Every block of 4
+                (*dst_size) = ecryptfs_max_decoded_size(src_size);
-                 * encoded characters decodes into a block of 3
-                 * decoded characters. This segment of code provides
-                 * the caller with the maximum amount of allocated
-                 * space that @dst will need to point to in a
-                 * subsequent call. */
-                (*dst_size) = (((src_size + 1) * 3) / 4);
                goto out;
        }
        while (src_byte_offset < src_size) {
@@ -2239,3 +2244,52 @@ out_free:
 out:
        return rc;
 }
+#define ENC_NAME_MAX_BLOCKLEN_8_OR_16   143
+int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
+                           struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+        struct blkcipher_desc desc;
+        struct mutex *tfm_mutex;
+        size_t cipher_blocksize;
+        int rc;
+        if (!(mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) {
+                (*namelen) = lower_namelen;
+                return 0;
+        }
+        rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+                        mount_crypt_stat->global_default_fn_cipher_name);
+        if (unlikely(rc)) {
+                (*namelen) = 0;
+                return rc;
+        }
+        mutex_lock(tfm_mutex);
+        cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm);
+        mutex_unlock(tfm_mutex);
+        /* Return an exact amount for the common cases */
+        if (lower_namelen == NAME_MAX
+            && (cipher_blocksize == 8 || cipher_blocksize == 16)) {
+                (*namelen) = ENC_NAME_MAX_BLOCKLEN_8_OR_16;
+                return 0;
+        }
+        /* Return a safe estimate for the uncommon cases */
+        (*namelen) = lower_namelen;
+        (*namelen) -= ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE;
+        /* Since this is the max decoded size, subtract 1 "decoded block" len */
+        (*namelen) = ecryptfs_max_decoded_size(*namelen) - 3;
+        (*namelen) -= ECRYPTFS_TAG_70_MAX_METADATA_SIZE;
+        (*namelen) -= ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES;
+        /* Worst case is that the filename is padded nearly a full block size */
+        (*namelen) -= cipher_blocksize - 1;
+        if ((*namelen) < 0)
+                (*namelen) = 0;
+        return 0;
+}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index a2362df58ae8..867b64c5d84f 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -162,6 +162,10 @@ ecryptfs_get_key_payload_data(struct key *key)
 #define ECRYPTFS_NON_NULL 0x42 /* A reasonable substitute for NULL */
 #define MD5_DIGEST_SIZE 16
 #define ECRYPTFS_TAG_70_DIGEST_SIZE MD5_DIGEST_SIZE
+#define ECRYPTFS_TAG_70_MIN_METADATA_SIZE (1 + ECRYPTFS_MIN_PKT_LEN_SIZE \
+                                           + ECRYPTFS_SIG_SIZE + 1 + 1)
+#define ECRYPTFS_TAG_70_MAX_METADATA_SIZE (1 + ECRYPTFS_MAX_PKT_LEN_SIZE \
+                                           + ECRYPTFS_SIG_SIZE + 1 + 1)
 #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FEK_ENCRYPTED."
 #define ECRYPTFS_FEK_ENCRYPTED_FILENAME_PREFIX_SIZE 23
 #define ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX "ECRYPTFS_FNEK_ENCRYPTED."
@@ -701,6 +705,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
                             size_t *packet_size,
                             struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
                             char *data, size_t max_packet_size);
+int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
+                           struct ecryptfs_mount_crypt_stat *mount_crypt_stat);
 int ecryptfs_derive_iv(char *iv, struct ecryptfs_crypt_stat *crypt_stat,
                       loff_t offset);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 19892d7d2ed1..ab35b113003b 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1085,6 +1085,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        }
        rc = vfs_setxattr(lower_dentry, name, value, size, flags);
+        if (!rc)
+                fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
 out:
        return rc;
 }
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 8e3b943e330f..2333203a120b 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -679,10 +679,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
         * Octets N3-N4: Block-aligned encrypted filename
         *  - Consists of a minimum number of random characters, a \0
         *    separator, and then the filename */
-        s->max_packet_size = (1                   /* Tag 70 identifier */
+        s->max_packet_size = (ECRYPTFS_TAG_70_MAX_METADATA_SIZE
-                              + 3                 /* Max Tag 70 packet size */
-                              + ECRYPTFS_SIG_SIZE /* FNEK sig */
-                              + 1                 /* Cipher identifier */
                              + s->block_aligned_filename_size);
        if (dest == NULL) {
                (*packet_size) = s->max_packet_size;
@@ -934,10 +931,10 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
                goto out;
        }
        s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
-        if (max_packet_size < (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1)) {
+        if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
                printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
                       "at least [%d]\n", __func__, max_packet_size,
-                        (1 + 1 + ECRYPTFS_SIG_SIZE + 1 + 1));
+                       ECRYPTFS_TAG_70_MIN_METADATA_SIZE);
                rc = -EINVAL;
                goto out;
        }
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 10ec695ccd68..a46b3a8fee1e 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -150,7 +150,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
                        /* This is a header extent */
                        char *page_virt;
-                        page_virt = kmap_atomic(page, KM_USER0);
+                        page_virt = kmap_atomic(page);
                        memset(page_virt, 0, PAGE_CACHE_SIZE);
                        /* TODO: Support more than one header extent */
                        if (view_extent_num == 0) {
@@ -163,7 +163,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
                                                               crypt_stat,
                                                               &written);
                        }
-                        kunmap_atomic(page_virt, KM_USER0);
+                        kunmap_atomic(page_virt);
                        flush_dcache_page(page);
                        if (rc) {
                                printk(KERN_ERR "%s: Error reading xattr "
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 5c0106f75775..b2a34a192f4f 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -156,7 +156,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
                               ecryptfs_page_idx, rc);
                        goto out;
                }
-                ecryptfs_page_virt = kmap_atomic(ecryptfs_page, KM_USER0);
+                ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
                /*
                 * pos: where we're now writing, offset: where the request was
@@ -179,7 +179,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
                               (data + data_offset), num_bytes);
                        data_offset += num_bytes;
                }
-                kunmap_atomic(ecryptfs_page_virt, KM_USER0);
+                kunmap_atomic(ecryptfs_page_virt);
                flush_dcache_page(ecryptfs_page);
                SetPageUptodate(ecryptfs_page);
                unlock_page(ecryptfs_page);
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 9df7fd6e0c39..cf152823bbf4 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -30,6 +30,8 @@
 #include <linux/seq_file.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
+#include <linux/statfs.h>
+#include <linux/magic.h>
 #include "ecryptfs_kernel.h"
 struct kmem_cache *ecryptfs_inode_info_cache;
@@ -102,10 +104,20 @@ static void ecryptfs_destroy_inode(struct inode *inode)
 static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+        int rc;
        if (!lower_dentry->d_sb->s_op->statfs)
                return -ENOSYS;
-        return lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
+        rc = lower_dentry->d_sb->s_op->statfs(lower_dentry, buf);
+        if (rc)
+                return rc;
+        buf->f_type = ECRYPTFS_SUPER_MAGIC;
+        rc = ecryptfs_set_f_namelen(&buf->f_namelen, buf->f_namelen,
+               &ecryptfs_superblock_to_private(dentry->d_sb)->mount_crypt_stat);
+        return rc;
 }
 /**
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index aabdfc38cf24..ea54cdef04dd 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -320,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
        return !list_empty(p);
 }
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+{
+        return container_of(p, struct eppoll_entry, wait);
+}
 /* Get the "struct epitem" from a wait queue pointer */
 static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
 {
@@ -467,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
        put_cpu();
 }
+static void ep_remove_wait_queue(struct eppoll_entry *pwq)
+{
+        wait_queue_head_t *whead;
+        rcu_read_lock();
+        /* If it is cleared by POLLFREE, it should be rcu-safe */
+        whead = rcu_dereference(pwq->whead);
+        if (whead)
+                remove_wait_queue(whead, &pwq->wait);
+        rcu_read_unlock();
+}
 /*
 * This function unregisters poll callbacks from the associated file
 * descriptor.  Must be called with "mtx" held (or "epmutex" if called from
@@ -481,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
                pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
                list_del(&pwq->llink);
-                remove_wait_queue(pwq->whead, &pwq->wait);
+                ep_remove_wait_queue(pwq);
                kmem_cache_free(pwq_cache, pwq);
        }
 }
@@ -842,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        struct epitem *epi = ep_item_from_wait(wait);
        struct eventpoll *ep = epi->ep;
+        if ((unsigned long)key & POLLFREE) {
+                ep_pwq_from_wait(wait)->whead = NULL;
+                /*
+                 * whead = NULL above can race with ep_remove_wait_queue()
+                 * which can do another remove_wait_queue() after us, so we
+                 * can't use __remove_wait_queue(). whead->lock is held by
+                 * the caller.
+                 */
+                list_del_init(&wait->task_list);
+        }
        spin_lock_irqsave(&ep->lock, flags);
        /*
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f855916657ba..5b4a9362d5aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -53,14 +53,6 @@ struct wb_writeback_work {
 };
 /*
- * Include the creation of the trace points after defining the
- * wb_writeback_work structure so that the definition remains local to this
- * file.
- */
-#define CREATE_TRACE_POINTS
-#include <trace/events/writeback.h>
-/*
 * We don't actually have pdflush, but this one is exported though /proc...
 */
 int nr_pdflush_threads;
@@ -92,6 +84,14 @@ static inline struct inode *wb_inode(struct list_head *head)
        return list_entry(head, struct inode, i_wb_list);
 }
+/*
+ * Include the creation of the trace points after defining the
+ * wb_writeback_work structure and inline functions so that the definition
+ * remains local to this file.
+ */
+#define CREATE_TRACE_POINTS
+#include <trace/events/writeback.h>
 /* Wakeup flusher thread or forker thread to fork it. Requires bdi->wb_lock. */
 static void bdi_wakeup_flusher(struct backing_dev_info *bdi)
 {
diff --git a/fs/inode.c b/fs/inode.c
index fb10d86ffad7..d3ebdbe723d0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1651,7 +1651,7 @@ __setup("ihash_entries=", set_ihash_entries);
 */
 void __init inode_init_early(void)
 {
-        int loop;
+        unsigned int loop;
        /* If hashes are distributed across NUMA nodes, defer
         * hash allocation until vmalloc space is available.
@@ -1669,13 +1669,13 @@ void __init inode_init_early(void)
                                        &i_hash_mask,
                                        0);
-        for (loop = 0; loop < (1 << i_hash_shift); loop++)
+        for (loop = 0; loop < (1U << i_hash_shift); loop++)
                INIT_HLIST_HEAD(&inode_hashtable[loop]);
 }
 void __init inode_init(void)
 {
-        int loop;
+        unsigned int loop;
        /* inode slab cache */
        inode_cachep = kmem_cache_create("inode_cache",
@@ -1699,7 +1699,7 @@ void __init inode_init(void)
                                        &i_hash_mask,
                                        0);
-        for (loop = 0; loop < (1 << i_hash_shift); loop++)
+        for (loop = 0; loop < (1U << i_hash_shift); loop++)
                INIT_HLIST_HEAD(&inode_hashtable[loop]);
 }
diff --git a/fs/ioprio.c b/fs/ioprio.c
index f84b380d65e5..0f1b9515213b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -51,7 +51,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio)
        ioc = get_task_io_context(task, GFP_ATOMIC, NUMA_NO_NODE);
        if (ioc) {
                ioc_ioprio_changed(ioc, ioprio);
-                put_io_context(ioc, NULL);
+                put_io_context(ioc);
        }
        return err;
diff --git a/fs/namei.c b/fs/namei.c
index 208c6aa4a989..a780ea515c47 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1095,8 +1095,10 @@ static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentr
        struct dentry *old;
        /* Don't create child dentry for a dead directory. */
-        if (unlikely(IS_DEADDIR(inode)))
+        if (unlikely(IS_DEADDIR(inode))) {
+                dput(dentry);
                return ERR_PTR(-ENOENT);
+        }
        old = inode->i_op->lookup(inode, dentry, nd);
        if (unlikely(old)) {
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f0c849c98fe4..ec9f6ef6c5dd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3575,8 +3575,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
        }
        if (npages > 1) {
                /* for decoding across pages */
-                args.acl_scratch = alloc_page(GFP_KERNEL);
+                res.acl_scratch = alloc_page(GFP_KERNEL);
-                if (!args.acl_scratch)
+                if (!res.acl_scratch)
                        goto out_free;
        }
        args.acl_len = npages * PAGE_SIZE;
@@ -3612,8 +3612,8 @@ out_free:
        for (i = 0; i < npages; i++)
                if (pages[i])
                        __free_page(pages[i]);
-        if (args.acl_scratch)
+        if (res.acl_scratch)
-                __free_page(args.acl_scratch);
+                __free_page(res.acl_scratch);
        return ret;
 }
@@ -4883,8 +4883,10 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                                clp->cl_rpcclient->cl_auth->au_flavor);
        res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
-        if (unlikely(!res.server_scope))
+        if (unlikely(!res.server_scope)) {
-                return -ENOMEM;
+                status = -ENOMEM;
+                goto out;
+        }
        status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
        if (!status)
@@ -4901,12 +4903,13 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
                        clp->server_scope = NULL;
                }
-                if (!clp->server_scope)
+                if (!clp->server_scope) {
                        clp->server_scope = res.server_scope;
-                else
+                        goto out;
-                        kfree(res.server_scope);
+                }
        }
+        kfree(res.server_scope);
+out:
        dprintk("<-- %s status= %d\n", __func__, status);
        return status;
 }
@@ -5008,37 +5011,53 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
        return status;
 }
+static struct nfs4_slot *nfs4_alloc_slots(u32 max_slots, gfp_t gfp_flags)
+{
+        return kcalloc(max_slots, sizeof(struct nfs4_slot), gfp_flags);
+}
+static void nfs4_add_and_init_slots(struct nfs4_slot_table *tbl,
+                struct nfs4_slot *new,
+                u32 max_slots,
+                u32 ivalue)
+{
+        struct nfs4_slot *old = NULL;
+        u32 i;
+        spin_lock(&tbl->slot_tbl_lock);
+        if (new) {
+                old = tbl->slots;
+                tbl->slots = new;
+                tbl->max_slots = max_slots;
+        }
+        tbl->highest_used_slotid = -1;  /* no slot is currently used */
+        for (i = 0; i < tbl->max_slots; i++)
+                tbl->slots[i].seq_nr = ivalue;
+        spin_unlock(&tbl->slot_tbl_lock);
+        kfree(old);
+}
 /*
- * Reset a slot table
+ * (re)Initialise a slot table
 */
-static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
+static int nfs4_realloc_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
-                                 int ivalue)
+                                 u32 ivalue)
 {
        struct nfs4_slot *new = NULL;
-        int i;
+        int ret = -ENOMEM;
-        int ret = 0;
        dprintk("--> %s: max_reqs=%u, tbl->max_slots %d\n", __func__,
                max_reqs, tbl->max_slots);
        /* Does the newly negotiated max_reqs match the existing slot table? */
        if (max_reqs != tbl->max_slots) {
-                ret = -ENOMEM;
+                new = nfs4_alloc_slots(max_reqs, GFP_NOFS);
-                new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
-                              GFP_NOFS);
                if (!new)
                        goto out;
-                ret = 0;
-                kfree(tbl->slots);
        }
-        spin_lock(&tbl->slot_tbl_lock);
+        ret = 0;
-        if (new) {
-                tbl->slots = new;
+        nfs4_add_and_init_slots(tbl, new, max_reqs, ivalue);
-                tbl->max_slots = max_reqs;
-        }
-        for (i = 0; i < tbl->max_slots; ++i)
-                tbl->slots[i].seq_nr = ivalue;
-        spin_unlock(&tbl->slot_tbl_lock);
        dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
                tbl, tbl->slots, tbl->max_slots);
 out:
@@ -5061,36 +5080,6 @@ static void nfs4_destroy_slot_tables(struct nfs4_session *session)
 }
 /*
- * Initialize slot table
- */
-static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
-                int max_slots, int ivalue)
-{
-        struct nfs4_slot *slot;
-        int ret = -ENOMEM;
-        BUG_ON(max_slots > NFS4_MAX_SLOT_TABLE);
-        dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
-        slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
-        if (!slot)
-                goto out;
-        ret = 0;
-        spin_lock(&tbl->slot_tbl_lock);
-        tbl->max_slots = max_slots;
-        tbl->slots = slot;
-        tbl->highest_used_slotid = -1;  /* no slot is currently used */
-        spin_unlock(&tbl->slot_tbl_lock);
-        dprintk("%s: tbl=%p slots=%p max_slots=%d\n", __func__,
-                tbl, tbl->slots, tbl->max_slots);
-out:
-        dprintk("<-- %s: return %d\n", __func__, ret);
-        return ret;
-}
-/*
 * Initialize or reset the forechannel and backchannel tables
 */
 static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
@@ -5101,25 +5090,16 @@ static int nfs4_setup_session_slot_tables(struct nfs4_session *ses)
        dprintk("--> %s\n", __func__);
        /* Fore channel */
        tbl = &ses->fc_slot_table;
-        if (tbl->slots == NULL) {
+        status = nfs4_realloc_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
-                status = nfs4_init_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
+        if (status) /* -ENOMEM */
-                if (status) /* -ENOMEM */
+                return status;
-                        return status;
-        } else {
-                status = nfs4_reset_slot_table(tbl, ses->fc_attrs.max_reqs, 1);
-                if (status)
-                        return status;
-        }
        /* Back channel */
        tbl = &ses->bc_slot_table;
-        if (tbl->slots == NULL) {
+        status = nfs4_realloc_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
-                status = nfs4_init_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
+        if (status && tbl->slots == NULL)
-                if (status)
+                /* Fore and back channel share a connection so get
-                        /* Fore and back channel share a connection so get
+                 * both slot tables or neither */
-                         * both slot tables or neither */
+                nfs4_destroy_slot_tables(ses);
-                        nfs4_destroy_slot_tables(ses);
-        } else
-                status = nfs4_reset_slot_table(tbl, ses->bc_attrs.max_reqs, 0);
        return status;
 }
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a53f33b4ac3a..45392032e7bd 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1132,6 +1132,8 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
 {
        struct nfs_client *clp = server->nfs_client;
+        if (test_and_clear_bit(NFS_DELEGATED_STATE, &state->flags))
+                nfs_async_inode_return_delegation(state->inode, &state->stateid);
        nfs4_state_mark_reclaim_nograce(clp, state);
        nfs4_schedule_state_manager(clp);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 95e92e438407..33bd8d0f745d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2522,7 +2522,6 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
        xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
                args->acl_pages, args->acl_pgbase, args->acl_len);
-        xdr_set_scratch_buffer(xdr, page_address(args->acl_scratch), PAGE_SIZE);
        encode_nops(&hdr);
 }
@@ -6032,6 +6031,10 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
        struct compound_hdr hdr;
        int status;
+        if (res->acl_scratch != NULL) {
+                void *p = page_address(res->acl_scratch);
+                xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
+        }
        status = decode_compound_hdr(xdr, &hdr);
        if (status)
                goto out;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index be244692550d..a9856e3eaaf0 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1053,7 +1053,7 @@ static int ocfs2_rename(struct inode *old_dir,
        handle_t *handle = NULL;
        struct buffer_head *old_dir_bh = NULL;
        struct buffer_head *new_dir_bh = NULL;
-        nlink_t old_dir_nlink = old_dir->i_nlink;
+        u32 old_dir_nlink = old_dir->i_nlink;
        struct ocfs2_dinode *old_di;
        struct ocfs2_dir_lookup_result old_inode_dot_dot_res = { NULL, };
        struct ocfs2_dir_lookup_result target_lookup_res = { NULL, };
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 7898cd688a00..fc2c4388d126 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -292,11 +292,26 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
        }
 }
+/* Return 1 if 'cmd' will block on frozen filesystem */
+static int quotactl_cmd_write(int cmd)
+{
+        switch (cmd) {
+        case Q_GETFMT:
+        case Q_GETINFO:
+        case Q_SYNC:
+        case Q_XGETQSTAT:
+        case Q_XGETQUOTA:
+        case Q_XQUOTASYNC:
+                return 0;
+        }
+        return 1;
+}
 /*
 * look up a superblock on which quota ops will be performed
 * - use the name of a block device to find the superblock thereon
 */
-static struct super_block *quotactl_block(const char __user *special)
+static struct super_block *quotactl_block(const char __user *special, int cmd)
 {
 #ifdef CONFIG_BLOCK
        struct block_device *bdev;
@@ -309,7 +324,10 @@ static struct super_block *quotactl_block(const char __user *special)
        putname(tmp);
        if (IS_ERR(bdev))
                return ERR_CAST(bdev);
-        sb = get_super(bdev);
+        if (quotactl_cmd_write(cmd))
+                sb = get_super_thawed(bdev);
+        else
+                sb = get_super(bdev);
        bdput(bdev);
        if (!sb)
                return ERR_PTR(-ENODEV);
@@ -361,7 +379,7 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
                        pathp = &path;
        }
-        sb = quotactl_block(special);
+        sb = quotactl_block(special, cmds);
        if (IS_ERR(sb)) {
                ret = PTR_ERR(sb);
                goto out;
diff --git a/fs/select.c b/fs/select.c
index d33418fdc858..e782258d0de3 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -912,7 +912,7 @@ static long do_restart_poll(struct restart_block *restart_block)
 }
 SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
-                long, timeout_msecs)
+                int, timeout_msecs)
 {
        struct timespec end_time, *to = NULL;
        int ret;
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b451dd..7ae2a574cb25 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
 #include <linux/signalfd.h>
 #include <linux/syscalls.h>
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+        wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+        /*
+         * The lockless check can race with remove_wait_queue() in progress,
+         * but in this case its caller should run under rcu_read_lock() and
+         * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+         */
+        if (likely(!waitqueue_active(wqh)))
+                return;
+        /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+        wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
 struct signalfd_ctx {
        sigset_t sigmask;
 };
diff --git a/fs/super.c b/fs/super.c
index 6015c02296b7..6277ec6cb60a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -634,6 +634,28 @@ rescan:
 EXPORT_SYMBOL(get_super);
 /**
+ *      get_super_thawed - get thawed superblock of a device
+ *      @bdev: device to get the superblock for
+ *
+ *      Scans the superblock list and finds the superblock of the file system
+ *      mounted on the device. The superblock is returned once it is thawed
+ *      (or immediately if it was not frozen). %NULL is returned if no match
+ *      is found.
+ */
+struct super_block *get_super_thawed(struct block_device *bdev)
+{
+        while (1) {
+                struct super_block *s = get_super(bdev);
+                if (!s || s->s_frozen == SB_UNFROZEN)
+                        return s;
+                up_read(&s->s_umount);
+                vfs_check_frozen(s, SB_FREEZE_WRITE);
+                put_super(s);
+        }
+}
+EXPORT_SYMBOL(get_super_thawed);
+/**
 * get_active_super - get an active reference to the superblock of a device
 * @bdev: device to get the superblock for
 *
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 292eff198030..ab7c53fe346e 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -110,10 +110,4 @@ kmem_zone_destroy(kmem_zone_t *zone)
 extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
 extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
-        return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
 #endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index b4ff40b5f918..53db20ee3e77 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -63,82 +63,6 @@ int xfs_dqerror_mod = 33;
 static struct lock_class_key xfs_dquot_other_class;
 /*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
-        xfs_mount_t  *mp,
-        xfs_dqid_t   id,
-        uint         type)
-{
-        xfs_dquot_t     *dqp;
-        boolean_t       brandnewdquot;
-        brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
-        dqp->dq_flags = type;
-        dqp->q_core.d_id = cpu_to_be32(id);
-        dqp->q_mount = mp;
-        /*
-         * No need to re-initialize these if this is a reclaimed dquot.
-         */
-        if (brandnewdquot) {
-                INIT_LIST_HEAD(&dqp->q_freelist);
-                mutex_init(&dqp->q_qlock);
-                init_waitqueue_head(&dqp->q_pinwait);
-                /*
-                 * Because we want to use a counting completion, complete
-                 * the flush completion once to allow a single access to
-                 * the flush completion without blocking.
-                 */
-                init_completion(&dqp->q_flush);
-                complete(&dqp->q_flush);
-                trace_xfs_dqinit(dqp);
-        } else {
-                /*
-                 * Only the q_core portion was zeroed in dqreclaim_one().
-                 * So, we need to reset others.
-                 */
-                dqp->q_nrefs = 0;
-                dqp->q_blkno = 0;
-                INIT_LIST_HEAD(&dqp->q_mplist);
-                INIT_LIST_HEAD(&dqp->q_hashlist);
-                dqp->q_bufoffset = 0;
-                dqp->q_fileoffset = 0;
-                dqp->q_transp = NULL;
-                dqp->q_gdquot = NULL;
-                dqp->q_res_bcount = 0;
-                dqp->q_res_icount = 0;
-                dqp->q_res_rtbcount = 0;
-                atomic_set(&dqp->q_pincount, 0);
-                dqp->q_hash = NULL;
-                ASSERT(list_empty(&dqp->q_freelist));
-                trace_xfs_dqreuse(dqp);
-        }
-        /*
-         * In either case we need to make sure group quotas have a different
-         * lock class than user quotas, to make sure lockdep knows we can
-         * locks of one of each at the same time.
-         */
-        if (!(type & XFS_DQ_USER))
-                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-        /*
-         * log item gets initialized later
-         */
-        return (dqp);
-}
-/*
 * This is called to free all the memory associated with a dquot
 */
 void
@@ -215,10 +139,10 @@ xfs_qm_adjust_dqtimers(
        if (!d->d_btimer) {
                if ((d->d_blk_softlimit &&
-                     (be64_to_cpu(d->d_bcount) >=
+                     (be64_to_cpu(d->d_bcount) >
                      be64_to_cpu(d->d_blk_softlimit))) ||
                    (d->d_blk_hardlimit &&
-                     (be64_to_cpu(d->d_bcount) >=
+                     (be64_to_cpu(d->d_bcount) >
                      be64_to_cpu(d->d_blk_hardlimit)))) {
                        d->d_btimer = cpu_to_be32(get_seconds() +
                                        mp->m_quotainfo->qi_btimelimit);
@@ -227,10 +151,10 @@ xfs_qm_adjust_dqtimers(
                }
        } else {
                if ((!d->d_blk_softlimit ||
-                     (be64_to_cpu(d->d_bcount) <
+                     (be64_to_cpu(d->d_bcount) <=
                      be64_to_cpu(d->d_blk_softlimit))) &&
                    (!d->d_blk_hardlimit ||
-                    (be64_to_cpu(d->d_bcount) <
+                    (be64_to_cpu(d->d_bcount) <=
                     be64_to_cpu(d->d_blk_hardlimit)))) {
                        d->d_btimer = 0;
                }
@@ -238,10 +162,10 @@ xfs_qm_adjust_dqtimers(
        if (!d->d_itimer) {
                if ((d->d_ino_softlimit &&
-                     (be64_to_cpu(d->d_icount) >=
+                     (be64_to_cpu(d->d_icount) >
                      be64_to_cpu(d->d_ino_softlimit))) ||
                    (d->d_ino_hardlimit &&
-                     (be64_to_cpu(d->d_icount) >=
+                     (be64_to_cpu(d->d_icount) >
                      be64_to_cpu(d->d_ino_hardlimit)))) {
                        d->d_itimer = cpu_to_be32(get_seconds() +
                                        mp->m_quotainfo->qi_itimelimit);
@@ -250,10 +174,10 @@ xfs_qm_adjust_dqtimers(
                }
        } else {
                if ((!d->d_ino_softlimit ||
-                     (be64_to_cpu(d->d_icount) <
+                     (be64_to_cpu(d->d_icount) <=
                      be64_to_cpu(d->d_ino_softlimit)))  &&
                    (!d->d_ino_hardlimit ||
-                     (be64_to_cpu(d->d_icount) <
+                     (be64_to_cpu(d->d_icount) <=
                      be64_to_cpu(d->d_ino_hardlimit)))) {
                        d->d_itimer = 0;
                }
@@ -261,10 +185,10 @@ xfs_qm_adjust_dqtimers(
        if (!d->d_rtbtimer) {
                if ((d->d_rtb_softlimit &&
-                     (be64_to_cpu(d->d_rtbcount) >=
+                     (be64_to_cpu(d->d_rtbcount) >
                      be64_to_cpu(d->d_rtb_softlimit))) ||
                    (d->d_rtb_hardlimit &&
-                     (be64_to_cpu(d->d_rtbcount) >=
+                     (be64_to_cpu(d->d_rtbcount) >
                      be64_to_cpu(d->d_rtb_hardlimit)))) {
                        d->d_rtbtimer = cpu_to_be32(get_seconds() +
                                        mp->m_quotainfo->qi_rtbtimelimit);
@@ -273,10 +197,10 @@ xfs_qm_adjust_dqtimers(
                }
        } else {
                if ((!d->d_rtb_softlimit ||
-                     (be64_to_cpu(d->d_rtbcount) <
+                     (be64_to_cpu(d->d_rtbcount) <=
                      be64_to_cpu(d->d_rtb_softlimit))) &&
                    (!d->d_rtb_hardlimit ||
-                     (be64_to_cpu(d->d_rtbcount) <
+                     (be64_to_cpu(d->d_rtbcount) <=
                      be64_to_cpu(d->d_rtb_hardlimit)))) {
                        d->d_rtbtimer = 0;
                }
@@ -567,7 +491,32 @@ xfs_qm_dqread(
        int                     error;
        int                     cancelflags = 0;
-        dqp = xfs_qm_dqinit(mp, id, type);
+        dqp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+        dqp->dq_flags = type;
+        dqp->q_core.d_id = cpu_to_be32(id);
+        dqp->q_mount = mp;
+        INIT_LIST_HEAD(&dqp->q_freelist);
+        mutex_init(&dqp->q_qlock);
+        init_waitqueue_head(&dqp->q_pinwait);
+        /*
+         * Because we want to use a counting completion, complete
+         * the flush completion once to allow a single access to
+         * the flush completion without blocking.
+         */
+        init_completion(&dqp->q_flush);
+        complete(&dqp->q_flush);
+        /*
+         * Make sure group quotas have a different lock class than user
+         * quotas.
+         */
+        if (!(type & XFS_DQ_USER))
+                lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
+        atomic_inc(&xfs_Gqm->qm_totaldquots);
        trace_xfs_dqread(dqp);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 541a508adea1..0ed9ee77937c 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1489,7 +1489,7 @@ xlog_recover_add_to_cont_trans(
        old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
        old_len = item->ri_buf[item->ri_cnt-1].i_len;
-        ptr = kmem_realloc(old_ptr, len+old_len, old_len, 0u);
+        ptr = kmem_realloc(old_ptr, len+old_len, old_len, KM_SLEEP);
        memcpy(&ptr[old_len], dp, len); /* d, s, l */
        item->ri_buf[item->ri_cnt-1].i_len += len;
        item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -1981,7 +1981,7 @@ xfs_qm_dqcheck(
        if (!errs && ddq->d_id) {
                if (ddq->d_blk_softlimit &&
-                    be64_to_cpu(ddq->d_bcount) >=
+                    be64_to_cpu(ddq->d_bcount) >
                                be64_to_cpu(ddq->d_blk_softlimit)) {
                        if (!ddq->d_btimer) {
                                if (flags & XFS_QMOPT_DOWARN)
@@ -1992,7 +1992,7 @@ xfs_qm_dqcheck(
                        }
                }
                if (ddq->d_ino_softlimit &&
-                    be64_to_cpu(ddq->d_icount) >=
+                    be64_to_cpu(ddq->d_icount) >
                                be64_to_cpu(ddq->d_ino_softlimit)) {
                        if (!ddq->d_itimer) {
                                if (flags & XFS_QMOPT_DOWARN)
@@ -2003,7 +2003,7 @@ xfs_qm_dqcheck(
                        }
                }
                if (ddq->d_rtb_softlimit &&
-                    be64_to_cpu(ddq->d_rtbcount) >=
+                    be64_to_cpu(ddq->d_rtbcount) >
                                be64_to_cpu(ddq->d_rtb_softlimit)) {
                        if (!ddq->d_rtbtimer) {
                                if (flags & XFS_QMOPT_DOWARN)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 671f37eae1c7..c436def733bf 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -50,7 +50,6 @@
 */
 struct mutex    xfs_Gqm_lock;
 struct xfs_qm   *xfs_Gqm;
-uint            ndquot;
 kmem_zone_t     *qm_dqzone;
 kmem_zone_t     *qm_dqtrxzone;
@@ -93,7 +92,6 @@ xfs_Gqm_init(void)
                goto out_free_udqhash;
        hsize /= sizeof(xfs_dqhash_t);
-        ndquot = hsize << 8;
        xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
        xqm->qm_dqhashmask = hsize - 1;
@@ -137,7 +135,6 @@ xfs_Gqm_init(void)
                xqm->qm_dqtrxzone = qm_dqtrxzone;
        atomic_set(&xqm->qm_totaldquots, 0);
-        xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
        xqm->qm_nrefs = 0;
        return xqm;
@@ -1600,216 +1597,150 @@ xfs_qm_init_quotainos(
        return 0;
 }
+STATIC void
+xfs_qm_dqfree_one(
+        struct xfs_dquot        *dqp)
+{
+        struct xfs_mount        *mp = dqp->q_mount;
+        struct xfs_quotainfo    *qi = mp->m_quotainfo;
+        mutex_lock(&dqp->q_hash->qh_lock);
+        list_del_init(&dqp->q_hashlist);
+        dqp->q_hash->qh_version++;
+        mutex_unlock(&dqp->q_hash->qh_lock);
-/*
+        mutex_lock(&qi->qi_dqlist_lock);
- * Pop the least recently used dquot off the freelist and recycle it.
+        list_del_init(&dqp->q_mplist);
- */
+        qi->qi_dquots--;
-STATIC struct xfs_dquot *
+        qi->qi_dqreclaims++;
-xfs_qm_dqreclaim_one(void)
+        mutex_unlock(&qi->qi_dqlist_lock);
+        xfs_qm_dqdestroy(dqp);
+}
+STATIC void
+xfs_qm_dqreclaim_one(
+        struct xfs_dquot        *dqp,
+        struct list_head        *dispose_list)
 {
-        struct xfs_dquot        *dqp;
+        struct xfs_mount        *mp = dqp->q_mount;
-        int                     restarts = 0;
+        int                     error;
-        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
+        if (!xfs_dqlock_nowait(dqp))
-restart:
+                goto out_busy;
-        list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
-                struct xfs_mount *mp = dqp->q_mount;
-                if (!xfs_dqlock_nowait(dqp))
+        /*
-                        continue;
+         * This dquot has acquired a reference in the meantime remove it from
+         * the freelist and try again.
+         */
+        if (dqp->q_nrefs) {
+                xfs_dqunlock(dqp);
-                /*
+                trace_xfs_dqreclaim_want(dqp);
-                 * This dquot has already been grabbed by dqlookup.
+                XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                 * Remove it from the freelist and try again.
-                 */
-                if (dqp->q_nrefs) {
-                        trace_xfs_dqreclaim_want(dqp);
-                        XQM_STATS_INC(xqmstats.xs_qm_dqwants);
-                        list_del_init(&dqp->q_freelist);
-                        xfs_Gqm->qm_dqfrlist_cnt--;
-                        restarts++;
-                        goto dqunlock;
-                }
-                ASSERT(dqp->q_hash);
+                list_del_init(&dqp->q_freelist);
-                ASSERT(!list_empty(&dqp->q_mplist));
+                xfs_Gqm->qm_dqfrlist_cnt--;
+                return;
+        }
-                /*
+        ASSERT(dqp->q_hash);
-                 * Try to grab the flush lock. If this dquot is in the process
+        ASSERT(!list_empty(&dqp->q_mplist));
-                 * of getting flushed to disk, we don't want to reclaim it.
-                 */
-                if (!xfs_dqflock_nowait(dqp))
-                        goto dqunlock;
-                /*
+        /*
-                 * We have the flush lock so we know that this is not in the
+         * Try to grab the flush lock. If this dquot is in the process of
-                 * process of being flushed. So, if this is dirty, flush it
+         * getting flushed to disk, we don't want to reclaim it.
-                 * DELWRI so that we don't get a freelist infested with
+         */
-                 * dirty dquots.
+        if (!xfs_dqflock_nowait(dqp))
-                 */
+                goto out_busy;
-                if (XFS_DQ_IS_DIRTY(dqp)) {
-                        int     error;
-                        trace_xfs_dqreclaim_dirty(dqp);
+        /*
+         * We have the flush lock so we know that this is not in the
+         * process of being flushed. So, if this is dirty, flush it
+         * DELWRI so that we don't get a freelist infested with
+         * dirty dquots.
+         */
+        if (XFS_DQ_IS_DIRTY(dqp)) {
+                trace_xfs_dqreclaim_dirty(dqp);
-                        /*
+                /*
-                         * We flush it delayed write, so don't bother
+                 * We flush it delayed write, so don't bother releasing the
-                         * releasing the freelist lock.
+                 * freelist lock.
-                         */
+                 */
-                        error = xfs_qm_dqflush(dqp, SYNC_TRYLOCK);
+                error = xfs_qm_dqflush(dqp, 0);
-                        if (error) {
+                if (error) {
-                                xfs_warn(mp, "%s: dquot %p flush failed",
+                        xfs_warn(mp, "%s: dquot %p flush failed",
-                                        __func__, dqp);
+                                 __func__, dqp);
-                        }
-                        goto dqunlock;
                }
-                xfs_dqfunlock(dqp);
                /*
-                 * Prevent lookup now that we are going to reclaim the dquot.
+                 * Give the dquot another try on the freelist, as the
-                 * Once XFS_DQ_FREEING is set lookup won't touch the dquot,
+                 * flushing will take some time.
-                 * thus we can drop the lock now.
                 */
-                dqp->dq_flags |= XFS_DQ_FREEING;
+                goto out_busy;
-                xfs_dqunlock(dqp);
+        }
+        xfs_dqfunlock(dqp);
-                mutex_lock(&dqp->q_hash->qh_lock);
-                list_del_init(&dqp->q_hashlist);
-                dqp->q_hash->qh_version++;
-                mutex_unlock(&dqp->q_hash->qh_lock);
-                mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-                list_del_init(&dqp->q_mplist);
-                mp->m_quotainfo->qi_dquots--;
-                mp->m_quotainfo->qi_dqreclaims++;
-                mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-                ASSERT(dqp->q_nrefs == 0);
+        /*
-                list_del_init(&dqp->q_freelist);
+         * Prevent lookups now that we are past the point of no return.
-                xfs_Gqm->qm_dqfrlist_cnt--;
+         */
+        dqp->dq_flags |= XFS_DQ_FREEING;
+        xfs_dqunlock(dqp);
-                mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+        ASSERT(dqp->q_nrefs == 0);
-                return dqp;
+        list_move_tail(&dqp->q_freelist, dispose_list);
-dqunlock:
+        xfs_Gqm->qm_dqfrlist_cnt--;
-                xfs_dqunlock(dqp);
-                if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
-                        break;
-                goto restart;
-        }
-        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
+        trace_xfs_dqreclaim_done(dqp);
-        return NULL;
+        XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-}
+        return;
-/*
+out_busy:
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
+        xfs_dqunlock(dqp);
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
-        int     howmany)
-{
-        int             nreclaimed = 0;
-        xfs_dquot_t     *dqp;
-        if (howmany <= 0)
+        /*
-                return 0;
+         * Move the dquot to the tail of the list so that we don't spin on it.
+         */
+        list_move_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
-        while (nreclaimed < howmany) {
+        trace_xfs_dqreclaim_busy(dqp);
-                dqp = xfs_qm_dqreclaim_one();
+        XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
-                if (!dqp)
-                        return nreclaimed;
-                xfs_qm_dqdestroy(dqp);
-                nreclaimed++;
-        }
-        return nreclaimed;
 }
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
 STATIC int
 xfs_qm_shake(
-        struct shrinker *shrink,
+        struct shrinker         *shrink,
-        struct shrink_control *sc)
+        struct shrink_control   *sc)
 {
-        int     ndqused, nfree, n;
+        int                     nr_to_scan = sc->nr_to_scan;
-        gfp_t gfp_mask = sc->gfp_mask;
+        LIST_HEAD               (dispose_list);
+        struct xfs_dquot        *dqp;
-        if (!kmem_shake_allow(gfp_mask))
-                return 0;
-        if (!xfs_Gqm)
-                return 0;
-        nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
-        /* incore dquots in all f/s's */
-        ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-        ASSERT(ndqused >= 0);
-        if (nfree <= ndqused && nfree < ndquot)
+        if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
                return 0;
+        if (!nr_to_scan)
+                goto out;
-        ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
+        mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-        n = nfree - ndqused - ndquot;           /* # over target */
+        while (!list_empty(&xfs_Gqm->qm_dqfrlist)) {
+                if (nr_to_scan-- <= 0)
-        return xfs_qm_shake_freelist(MAX(nfree, n));
+                        break;
-}
+                dqp = list_first_entry(&xfs_Gqm->qm_dqfrlist, struct xfs_dquot,
+                                       q_freelist);
+                xfs_qm_dqreclaim_one(dqp, &dispose_list);
-/*------------------------------------------------------------------*/
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
-        xfs_dquot_t **O_dqpp)
-{
-        xfs_dquot_t     *dqp;
-        /*
-         * Check against high water mark to see if we want to pop
-         * a nincompoop dquot off the freelist.
-         */
-        if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
-                /*
-                 * Try to recycle a dquot from the freelist.
-                 */
-                if ((dqp = xfs_qm_dqreclaim_one())) {
-                        XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
-                        /*
-                         * Just zero the core here. The rest will get
-                         * reinitialized by caller. XXX we shouldn't even
-                         * do this zero ...
-                         */
-                        memset(&dqp->q_core, 0, sizeof(dqp->q_core));
-                        *O_dqpp = dqp;
-                        return B_FALSE;
-                }
-                XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
        }
+        mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-        /*
+        while (!list_empty(&dispose_list)) {
-         * Allocate a brand new dquot on the kernel heap and return it
+                dqp = list_first_entry(&dispose_list, struct xfs_dquot,
-         * to the caller to initialize.
+                                       q_freelist);
-         */
+                list_del_init(&dqp->q_freelist);
-        ASSERT(xfs_Gqm->qm_dqzone != NULL);
+                xfs_qm_dqfree_one(dqp);
-        *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
+        }
-        atomic_inc(&xfs_Gqm->qm_totaldquots);
+out:
+        return (xfs_Gqm->qm_dqfrlist_cnt / 100) * sysctl_vfs_cache_pressure;
-        return B_TRUE;
 }
 /*
 * Start a transaction and write the incore superblock changes to
 * disk. flags parameter indicates which fields have changed.
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 9b4f3adefbc5..9a9b997e1a0a 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -26,24 +26,12 @@
 struct xfs_qm;
 struct xfs_inode;
-extern uint             ndquot;
 extern struct mutex     xfs_Gqm_lock;
 extern struct xfs_qm    *xfs_Gqm;
 extern kmem_zone_t      *qm_dqzone;
 extern kmem_zone_t      *qm_dqtrxzone;
 /*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS     4
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO             2
-/*
 * Dquot hashtable constants/threshold values.
 */
 #define XFS_QM_HASHSIZE_LOW             (PAGE_SIZE / sizeof(xfs_dqhash_t))
@@ -74,7 +62,6 @@ typedef struct xfs_qm {
        int              qm_dqfrlist_cnt;
        atomic_t         qm_totaldquots; /* total incore dquots */
        uint             qm_nrefs;       /* file systems with quota on */
-        int              qm_dqfree_ratio;/* ratio of free to inuse dquots */
        kmem_zone_t     *qm_dqzone;      /* dquot mem-alloc zone */
        kmem_zone_t     *qm_dqtrxzone;   /* t_dqinfo of transactions */
 } xfs_qm_t;
@@ -143,7 +130,6 @@ extern int		xfs_qm_quotacheck(xfs_mount_t *);
 extern int              xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
 /* dquot stuff */
-extern boolean_t        xfs_qm_dqalloc_incore(xfs_dquot_t **);
 extern int              xfs_qm_dqpurge_all(xfs_mount_t *, uint);
 extern void             xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c
index 8671a0b32644..5729ba570877 100644
--- a/fs/xfs/xfs_qm_stats.c
+++ b/fs/xfs/xfs_qm_stats.c
@@ -42,9 +42,9 @@ static int xqm_proc_show(struct seq_file *m, void *v)
 {
        /* maximum; incore; ratio free to inuse; freelist */
        seq_printf(m, "%d\t%d\t%d\t%u\n",
-                        ndquot,
+                        0,
                        xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
-                        xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
+                        0,
                        xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
        return 0;
 }
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index eafbcff81f3a..711a86e39ff0 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -813,11 +813,11 @@ xfs_qm_export_dquot(
             (XFS_IS_OQUOTA_ENFORCED(mp) &&
                        (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
            dst->d_id != 0) {
-                if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
+                if (((int) dst->d_bcount > (int) dst->d_blk_softlimit) &&
                    (dst->d_blk_softlimit > 0)) {
                        ASSERT(dst->d_btimer != 0);
                }
-                if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
+                if (((int) dst->d_icount > (int) dst->d_ino_softlimit) &&
                    (dst->d_ino_softlimit > 0)) {
                        ASSERT(dst->d_itimer != 0);
                }
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6b6df5802e95..bb134a819930 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -733,11 +733,10 @@ DEFINE_EVENT(xfs_dquot_class, name, \
 DEFINE_DQUOT_EVENT(xfs_dqadjust);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
 DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_busy);
+DEFINE_DQUOT_EVENT(xfs_dqreclaim_done);
 DEFINE_DQUOT_EVENT(xfs_dqattach_found);
 DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
 DEFINE_DQUOT_EVENT(xfs_dqalloc);
 DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
 DEFINE_DQUOT_EVENT(xfs_dqread);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 329b06aba1c2..7adcdf15ae0c 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1151,8 +1151,8 @@ xfs_trans_add_item(
 {
        struct xfs_log_item_desc *lidp;
-        ASSERT(lip->li_mountp = tp->t_mountp);
+        ASSERT(lip->li_mountp == tp->t_mountp);
-        ASSERT(lip->li_ailp = tp->t_mountp->m_ail);
+        ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
        lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS);
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 4d00ee67792d..c4ba366d24e6 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -649,12 +649,12 @@ xfs_trans_dqresv(
                         * nblks.
                         */
                        if (hardlimit > 0ULL &&
-                            hardlimit <= nblks + *resbcountp) {
+                            hardlimit < nblks + *resbcountp) {
                                xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
                                goto error_return;
                        }
                        if (softlimit > 0ULL &&
-                            softlimit <= nblks + *resbcountp) {
+                            softlimit < nblks + *resbcountp) {
                                if ((timer != 0 && get_seconds() > timer) ||
                                    (warns != 0 && warns >= warnlimit)) {
                                        xfs_quota_warn(mp, dqp,
@@ -677,11 +677,13 @@ xfs_trans_dqresv(
                        if (!softlimit)
                                softlimit = q->qi_isoftlimit;
-                        if (hardlimit > 0ULL && count >= hardlimit) {
+                        if (hardlimit > 0ULL &&
+                            hardlimit < ninos + count) {
                                xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
                                goto error_return;
                        }
-                        if (softlimit > 0ULL && count >= softlimit) {
+                        if (softlimit > 0ULL &&
+                            softlimit < ninos + count) {
                                if  ((timer != 0 && get_seconds() > timer) ||
                                     (warns != 0 && warns >= warnlimit)) {
                                        xfs_quota_warn(mp, dqp,