Merge branch 'fix/hda' into topic/hda

author: Takashi Iwai <tiwai@suse.de> 2010-11-29 01:44:01 -0500
committer: Takashi Iwai <tiwai@suse.de> 2010-11-29 01:44:01 -0500
commit: ca19e77e44985b5500f5461f7d2f4ce799cb60ce (patch)
tree: 3ba3635ac2f212b332198b14cc3239195c153e67 /fs
parent: 9d57883f08d3c0c111b50bf185dfee9731a12c76 (diff)
parent: ac70eb1305d5a81efd1e32327d7e79be15a63a5a (diff)
78 files changed, 694 insertions, 704 deletions
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7c..4bd454fa844e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
 {
        struct bio *bio;
+        if (nr_iovecs > UIO_MAXIOV)
+                return NULL;
        bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
                      gfp_mask);
        if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
 static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
                                               gfp_t gfp_mask)
 {
-        struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
+        struct bio_map_data *bmd;
+        if (iov_count > UIO_MAXIOV)
+                return NULL;
+        bmd = kmalloc(sizeof(*bmd), gfp_mask);
        if (!bmd)
                return NULL;
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
                end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
                start = uaddr >> PAGE_SHIFT;
+                /*
+                 * Overflow, abort
+                 */
+                if (end < start)
+                        return ERR_PTR(-EINVAL);
                nr_pages += end - start;
                len += iov[i].iov_len;
        }
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
                unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
                unsigned long start = uaddr >> PAGE_SHIFT;
+                /*
+                 * Overflow, abort
+                 */
+                if (end < start)
+                        return ERR_PTR(-EINVAL);
                nr_pages += end - start;
                /*
                 * buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
                unsigned long start = uaddr >> PAGE_SHIFT;
                const int local_nr_pages = end - start;
                const int page_limit = cur_page + local_nr_pages;
-                
                ret = get_user_pages_fast(uaddr, local_nr_pages,
                                write_to_vm, &pages[cur_page]);
                if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 06e8ff12b97c..4230252fd689 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/major.h>
-#include <linux/smp_lock.h>
 #include <linux/device_cgroup.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index e9c874abc9e1..561438b6a50c 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
        err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
                                  page->index << PAGE_CACHE_SHIFT, &len,
                                  ci->i_truncate_seq, ci->i_truncate_size,
-                                  &page, 1);
+                                  &page, 1, 0);
        if (err == -ENOENT)
                err = 0;
        if (err < 0) {
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
        rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
                                 offset, &len,
                                 ci->i_truncate_seq, ci->i_truncate_size,
-                                 pages, nr_pages);
+                                 pages, nr_pages, 0);
        if (rc == -ENOENT)
                rc = 0;
        if (rc < 0)
@@ -774,7 +774,7 @@ get_more_pages:
                                            snapc, do_sync,
                                            ci->i_truncate_seq,
                                            ci->i_truncate_size,
-                                            &inode->i_mtime, true, 1);
+                                            &inode->i_mtime, true, 1, 0);
                                max_pages = req->r_num_pages;
                                alloc_page_vec(fsc, req);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98ab13e2b71d..60d27bc9eb83 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
            invalidating_gen == ci->i_rdcache_gen) {
                /* success. */
                dout("try_nonblocking_invalidate %p success\n", inode);
-                ci->i_rdcache_gen = 0;
+                /* save any racing async invalidate some trouble */
-                ci->i_rdcache_revoking = 0;
+                ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
                return 0;
        }
        dout("try_nonblocking_invalidate %p failed\n", inode);
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 {
        struct ceph_inode_info *ci = ceph_inode(inode);
        int mds = session->s_mds;
-        unsigned seq = le32_to_cpu(grant->seq);
+        int seq = le32_to_cpu(grant->seq);
-        unsigned issue_seq = le32_to_cpu(grant->issue_seq);
        int newcaps = le32_to_cpu(grant->caps);
        int issued, implemented, used, wanted, dirty;
        u64 size = le64_to_cpu(grant->size);
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        int revoked_rdcache = 0;
        int queue_invalidate = 0;
-        dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
+        dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
-             inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
+             inode, cap, mds, seq, ceph_cap_string(newcaps));
        dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
                inode->i_size);
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
        }
        cap->seq = seq;
-        cap->issue_seq = issue_seq;
        /* file layout may have changed */
        ci->i_layout = grant->layout;
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
                     NULL /* no caps context */);
        try_flush_caps(inode, session, NULL);
        up_read(&mdsc->snap_rwsem);
+        /* make sure we re-request max_size, if necessary */
+        spin_lock(&inode->i_lock);
+        ci->i_requested_max_size = 0;
+        spin_unlock(&inode->i_lock);
 }
 /*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e0a2dc6fcafc..7d447af84ec4 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -336,7 +336,10 @@ more:
                if (req->r_reply_info.dir_end) {
                        kfree(fi->last_name);
                        fi->last_name = NULL;
-                        fi->next_offset = 2;
+                        if (ceph_frag_is_rightmost(frag))
+                                fi->next_offset = 2;
+                        else
+                                fi->next_offset = 0;
                } else {
                        rinfo = &req->r_reply_info;
                        err = note_last_dentry(fi,
@@ -355,18 +358,22 @@ more:
                u64 pos = ceph_make_fpos(frag, off);
                struct ceph_mds_reply_inode *in =
                        rinfo->dir_in[off - fi->offset].in;
+                struct ceph_vino vino;
+                ino_t ino;
                dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
                     off, off - fi->offset, rinfo->dir_nr, pos,
                     rinfo->dir_dname_len[off - fi->offset],
                     rinfo->dir_dname[off - fi->offset], in);
                BUG_ON(!in);
                ftype = le32_to_cpu(in->mode) >> 12;
+                vino.ino = le64_to_cpu(in->ino);
+                vino.snap = le64_to_cpu(in->snapid);
+                ino = ceph_vino_to_ino(vino);
                if (filldir(dirent,
                            rinfo->dir_dname[off - fi->offset],
                            rinfo->dir_dname_len[off - fi->offset],
-                            pos,
+                            pos, ino, ftype) < 0) {
-                            le64_to_cpu(in->ino),
-                            ftype) < 0) {
                        dout("filldir stopping us...\n");
                        return 0;
                }
@@ -414,6 +421,7 @@ static void reset_readdir(struct ceph_file_info *fi)
                fi->last_readdir = NULL;
        }
        kfree(fi->last_name);
+        fi->last_name = NULL;
        fi->next_offset = 2;  /* compensate for . and .. */
        if (fi->dentry) {
                dput(fi->dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e77c28cf3690..8d79b8912e31 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
        }
        /*
-         * No need to block if we have any caps.  Update wanted set
+         * No need to block if we have caps on the auth MDS (for
+         * write) or any MDS (for read).  Update wanted set
         * asynchronously.
         */
        spin_lock(&inode->i_lock);
-        if (__ceph_is_any_real_caps(ci)) {
+        if (__ceph_is_any_real_caps(ci) &&
+            (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
                int mds_wanted = __ceph_caps_mds_wanted(ci);
                int issued = __ceph_caps_issued(ci, NULL);
@@ -280,11 +282,12 @@ int ceph_release(struct inode *inode, struct file *file)
 static int striped_read(struct inode *inode,
                        u64 off, u64 len,
                        struct page **pages, int num_pages,
-                        int *checkeof)
+                        int *checkeof, bool align_to_pages)
 {
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
        struct ceph_inode_info *ci = ceph_inode(inode);
        u64 pos, this_len;
+        int io_align, page_align;
        int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
        int left, pages_left;
        int read;
@@ -300,14 +303,19 @@ static int striped_read(struct inode *inode,
        page_pos = pages;
        pages_left = num_pages;
        read = 0;
+        io_align = off & ~PAGE_MASK;
 more:
+        if (align_to_pages)
+                page_align = (pos - io_align) & ~PAGE_MASK;
+        else
+                page_align = pos & ~PAGE_MASK;
        this_len = left;
        ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
                                  &ci->i_layout, pos, &this_len,
                                  ci->i_truncate_seq,
                                  ci->i_truncate_size,
-                                  page_pos, pages_left);
+                                  page_pos, pages_left, page_align);
        hit_stripe = this_len < left;
        was_short = ret >= 0 && ret < this_len;
        if (ret == -ENOENT)
@@ -374,26 +382,25 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
        dout("sync_read on file %p %llu~%u %s\n", file, off, len,
             (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
-        if (file->f_flags & O_DIRECT) {
+        if (file->f_flags & O_DIRECT)
-                pages = ceph_get_direct_page_vector(data, num_pages, off, len);
+                pages = ceph_get_direct_page_vector(data, num_pages);
+        else
-                /*
-                 * flush any page cache pages in this range.  this
-                 * will make concurrent normal and O_DIRECT io slow,
-                 * but it will at least behave sensibly when they are
-                 * in sequence.
-                 */
-        } else {
                pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
-        }
        if (IS_ERR(pages))
                return PTR_ERR(pages);
+        /*
+         * flush any page cache pages in this range.  this
+         * will make concurrent normal and sync io slow,
+         * but it will at least behave sensibly when they are
+         * in sequence.
+         */
        ret = filemap_write_and_wait(inode->i_mapping);
        if (ret < 0)
                goto done;
-        ret = striped_read(inode, off, len, pages, num_pages, checkeof);
+        ret = striped_read(inode, off, len, pages, num_pages, checkeof,
+                           file->f_flags & O_DIRECT);
        if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
                ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -448,6 +455,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        int flags;
        int do_sync = 0;
        int check_caps = 0;
+        int page_align, io_align;
        int ret;
        struct timespec mtime = CURRENT_TIME;
@@ -462,6 +470,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
        else
                pos = *offset;
+        io_align = pos & ~PAGE_MASK;
        ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
        if (ret < 0)
                return ret;
@@ -486,20 +496,26 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
         */
 more:
        len = left;
+        if (file->f_flags & O_DIRECT)
+                /* write from beginning of first page, regardless of
+                   io alignment */
+                page_align = (pos - io_align) & ~PAGE_MASK;
+        else
+                page_align = pos & ~PAGE_MASK;
        req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
                                    ceph_vino(inode), pos, &len,
                                    CEPH_OSD_OP_WRITE, flags,
                                    ci->i_snap_realm->cached_context,
                                    do_sync,
                                    ci->i_truncate_seq, ci->i_truncate_size,
-                                    &mtime, false, 2);
+                                    &mtime, false, 2, page_align);
        if (!req)
                return -ENOMEM;
        num_pages = calc_pages_for(pos, len);
        if (file->f_flags & O_DIRECT) {
-                pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
+                pages = ceph_get_direct_page_vector(data, num_pages);
                if (IS_ERR(pages)) {
                        ret = PTR_ERR(pages);
                        goto out;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1d6a45b5a04c..bf1286588f26 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2,7 +2,6 @@
 #include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
@@ -471,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
        if (issued & (CEPH_CAP_FILE_EXCL|
                      CEPH_CAP_FILE_WR|
-                      CEPH_CAP_FILE_BUFFER)) {
+                      CEPH_CAP_FILE_BUFFER|
+                      CEPH_CAP_AUTH_EXCL|
+                      CEPH_CAP_XATTR_EXCL)) {
                if (timespec_compare(ctime, &inode->i_ctime) > 0) {
                        dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
                             inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -511,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
                        warn = 1;
                }
        } else {
-                /* we have no write caps; whatever the MDS says is true */
+                /* we have no write|excl caps; whatever the MDS says is true */
                if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
                        inode->i_ctime = *ctime;
                        inode->i_mtime = *mtime;
@@ -567,12 +568,17 @@ static int fill_inode(struct inode *inode,
        /*
         * provided version will be odd if inode value is projected,
-         * even if stable.  skip the update if we have a newer info
+         * even if stable.  skip the update if we have newer stable
-         * (e.g., due to inode info racing form multiple MDSs), or if
+         * info (ours>=theirs, e.g. due to racing mds replies), unless
-         * we are getting projected (unstable) inode info.
+         * we are getting projected (unstable) info (in which case the
+         * version is odd, and we want ours>theirs).
+         *   us   them
+         *   2    2     skip
+         *   3    2     skip
+         *   3    3     update
         */
        if (le64_to_cpu(info->version) > 0 &&
-            (ci->i_version & ~1) > le64_to_cpu(info->version))
+            (ci->i_version & ~1) >= le64_to_cpu(info->version))
                goto no_change;
        issued = __ceph_caps_issued(ci, &implemented);
@@ -606,7 +612,14 @@ static int fill_inode(struct inode *inode,
                            le32_to_cpu(info->time_warp_seq),
                            &ctime, &mtime, &atime);
-        ci->i_max_size = le64_to_cpu(info->max_size);
+        /* only update max_size on auth cap */
+        if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+            ci->i_max_size != le64_to_cpu(info->max_size)) {
+                dout("max_size %lld -> %llu\n", ci->i_max_size,
+                     le64_to_cpu(info->max_size));
+                ci->i_max_size = le64_to_cpu(info->max_size);
+        }
        ci->i_layout = info->layout;
        inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
@@ -1055,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
                ininfo = rinfo->targeti.in;
                vino.ino = le64_to_cpu(ininfo->ino);
                vino.snap = le64_to_cpu(ininfo->snapid);
-                if (!dn->d_inode) {
+                in = dn->d_inode;
+                if (!in) {
                        in = ceph_get_inode(sb, vino);
                        if (IS_ERR(in)) {
                                pr_err("fill_trace bad get_inode "
@@ -1386,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work)
        spin_lock(&inode->i_lock);
        dout("invalidate_pages %p gen %d revoking %d\n", inode,
             ci->i_rdcache_gen, ci->i_rdcache_revoking);
-        if (ci->i_rdcache_gen == 0 ||
+        if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
-            ci->i_rdcache_revoking != ci->i_rdcache_gen) {
-                BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
                /* nevermind! */
-                ci->i_rdcache_revoking = 0;
                spin_unlock(&inode->i_lock);
                goto out;
        }
@@ -1400,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work)
        ceph_invalidate_nondirty_pages(inode->i_mapping);
        spin_lock(&inode->i_lock);
-        if (orig_gen == ci->i_rdcache_gen) {
+        if (orig_gen == ci->i_rdcache_gen &&
+            orig_gen == ci->i_rdcache_revoking) {
                dout("invalidate_pages %p gen %d successful\n", inode,
                     ci->i_rdcache_gen);
-                ci->i_rdcache_gen = 0;
+                ci->i_rdcache_revoking--;
-                ci->i_rdcache_revoking = 0;
                check = 1;
        } else {
-                dout("invalidate_pages %p gen %d raced, gen now %d\n",
+                dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
-                     inode, orig_gen, ci->i_rdcache_gen);
+                     inode, orig_gen, ci->i_rdcache_gen,
+                     ci->i_rdcache_revoking);
        }
        spin_unlock(&inode->i_lock);
@@ -1739,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
                return 0;
        }
-        dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask));
+        dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
        if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
                return 0;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3142b15940c2..098b18508479 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -6,7 +6,6 @@
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include "super.h"
 #include "mds_client.h"
@@ -529,6 +528,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
        ceph_mdsc_get_request(req);
        __insert_request(mdsc, req);
+        req->r_uid = current_fsuid();
+        req->r_gid = current_fsgid();
        if (dir) {
                struct ceph_inode_info *ci = ceph_inode(dir);
@@ -1588,8 +1590,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
        head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
        head->op = cpu_to_le32(req->r_op);
-        head->caller_uid = cpu_to_le32(current_fsuid());
+        head->caller_uid = cpu_to_le32(req->r_uid);
-        head->caller_gid = cpu_to_le32(current_fsgid());
+        head->caller_gid = cpu_to_le32(req->r_gid);
        head->args = req->r_args;
        ceph_encode_filepath(&p, end, ino1, path1);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d66d63c72355..9341fd4f1432 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -170,6 +170,8 @@ struct ceph_mds_request {
        union ceph_mds_request_args r_args;
        int r_fmode;        /* file mode, if expecting cap */
+        uid_t r_uid;
+        gid_t r_gid;
        /* for choosing which mds to send this request to */
        int r_direct_mode;
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1886294e12f7..7f01728a4657 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,9 +293,7 @@ struct ceph_inode_info {
        int i_rd_ref, i_rdcache_ref, i_wr_ref;
        int i_wrbuffer_ref, i_wrbuffer_ref_head;
        u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
-        u32 i_rdcache_gen;      /* we increment this each time we get
+        u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
-                                   FILE_CACHE.  If it's non-zero, we
-                                   _may_ have cached pages. */
        u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
        struct list_head i_unsafe_writes; /* uncommitted sync writes */
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c61e52..355abcdcda98 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
 v) mount check for unmatched uids
-w) Add support for new vfs entry points for setlease and fallocate 
+w) Add support for new vfs entry point for fallocate
 x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 
 processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59a4105..e9a393c9c2ca 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
 *   the GNU Lesser General Public License for more details.
 *
 */
-#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 #ifndef _CIFS_FS_SB_H
 #define _CIFS_FS_SB_H
@@ -42,9 +42,9 @@
 #define CIFS_MOUNT_MULTIUSER    0x20000 /* multiuser mount */
 struct cifs_sb_info {
-        struct radix_tree_root tlink_tree;
+        struct rb_root tlink_tree;
-#define CIFS_TLINK_MASTER_TAG           0       /* is "master" (mount) tcon */
        spinlock_t tlink_tree_lock;
+        struct tcon_link *master_tlink;
        struct nls_table *local_nls;
        unsigned int rsize;
        unsigned int wsize;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 75c4eaa79588..9c3789762ab7 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
                return -ENOMEM;
        spin_lock_init(&cifs_sb->tlink_tree_lock);
-        INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL);
+        cifs_sb->tlink_tree = RB_ROOT;
        rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
        if (rc) {
@@ -321,8 +321,7 @@ cifs_alloc_inode(struct super_block *sb)
        /* Until the file is open and we have gotten oplock
        info back from the server, can not assume caching of
        file data or metadata */
-        cifs_inode->clientCanCacheRead = false;
+        cifs_set_oplock_level(cifs_inode, 0);
-        cifs_inode->clientCanCacheAll = false;
        cifs_inode->delete_pending = false;
        cifs_inode->invalid_mapping = false;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index f259e4d7612d..b577bf0a1bb3 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -336,7 +336,8 @@ struct cifsTconInfo {
 * "get" on the container.
 */
 struct tcon_link {
-        unsigned long           tl_index;
+        struct rb_node          tl_rbnode;
+        uid_t                   tl_uid;
        unsigned long           tl_flags;
 #define TCON_LINK_MASTER        0
 #define TCON_LINK_PENDING       1
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index edb6d90efdf2..7ed69b6b5fe6 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -104,6 +104,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
 extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
                                      int offset);
+extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
                                struct file *file, struct tcon_link *tlink,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 9eb327defa1d..251a17c03545 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -116,6 +116,7 @@ struct smb_vol {
 static int ipv4_connect(struct TCP_Server_Info *server);
 static int ipv6_connect(struct TCP_Server_Info *server);
+static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
 static void cifs_prune_tlinks(struct work_struct *work);
 /*
@@ -2900,24 +2901,16 @@ remote_path_check:
                goto mount_fail_check;
        }
-        tlink->tl_index = pSesInfo->linux_uid;
+        tlink->tl_uid = pSesInfo->linux_uid;
        tlink->tl_tcon = tcon;
        tlink->tl_time = jiffies;
        set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
        set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
-        rc = radix_tree_preload(GFP_KERNEL);
+        cifs_sb->master_tlink = tlink;
-        if (rc == -ENOMEM) {
-                kfree(tlink);
-                goto mount_fail_check;
-        }
        spin_lock(&cifs_sb->tlink_tree_lock);
-        radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink);
+        tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
-        radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
-                           CIFS_TLINK_MASTER_TAG);
        spin_unlock(&cifs_sb->tlink_tree_lock);
-        radix_tree_preload_end();
        queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
                                TLINK_IDLE_EXPIRE);
@@ -3107,32 +3100,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 int
 cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 {
-        int i, ret;
+        struct rb_root *root = &cifs_sb->tlink_tree;
+        struct rb_node *node;
+        struct tcon_link *tlink;
        char *tmp;
-        struct tcon_link *tlink[8];
-        unsigned long index = 0;
        cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
-        do {
+        spin_lock(&cifs_sb->tlink_tree_lock);
-                spin_lock(&cifs_sb->tlink_tree_lock);
+        while ((node = rb_first(root))) {
-                ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
+                tlink = rb_entry(node, struct tcon_link, tl_rbnode);
-                                             (void **)tlink, index,
+                cifs_get_tlink(tlink);
-                                             ARRAY_SIZE(tlink));
+                clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
-                /* increment index for next pass */
+                rb_erase(node, root);
-                if (ret > 0)
-                        index = tlink[ret - 1]->tl_index + 1;
-                for (i = 0; i < ret; i++) {
-                        cifs_get_tlink(tlink[i]);
-                        clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
-                        radix_tree_delete(&cifs_sb->tlink_tree,
-                                                        tlink[i]->tl_index);
-                }
-                spin_unlock(&cifs_sb->tlink_tree_lock);
-                for (i = 0; i < ret; i++)
+                spin_unlock(&cifs_sb->tlink_tree_lock);
-                        cifs_put_tlink(tlink[i]);
+                cifs_put_tlink(tlink);
-        } while (ret != 0);
+                spin_lock(&cifs_sb->tlink_tree_lock);
+        }
+        spin_unlock(&cifs_sb->tlink_tree_lock);
        tmp = cifs_sb->prepath;
        cifs_sb->prepathlen = 0;
@@ -3271,22 +3257,10 @@ out:
        return tcon;
 }
-static struct tcon_link *
+static inline struct tcon_link *
 cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
 {
-        struct tcon_link *tlink;
+        return cifs_sb->master_tlink;
-        unsigned int ret;
-        spin_lock(&cifs_sb->tlink_tree_lock);
-        ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
-                                        0, 1, CIFS_TLINK_MASTER_TAG);
-        spin_unlock(&cifs_sb->tlink_tree_lock);
-        /* the master tcon should always be present */
-        if (ret == 0)
-                BUG();
-        return tlink;
 }
 struct cifsTconInfo *
@@ -3302,6 +3276,47 @@ cifs_sb_tcon_pending_wait(void *unused)
        return signal_pending(current) ? -ERESTARTSYS : 0;
 }
+/* find and return a tlink with given uid */
+static struct tcon_link *
+tlink_rb_search(struct rb_root *root, uid_t uid)
+{
+        struct rb_node *node = root->rb_node;
+        struct tcon_link *tlink;
+        while (node) {
+                tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+                if (tlink->tl_uid > uid)
+                        node = node->rb_left;
+                else if (tlink->tl_uid < uid)
+                        node = node->rb_right;
+                else
+                        return tlink;
+        }
+        return NULL;
+}
+/* insert a tcon_link into the tree */
+static void
+tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
+{
+        struct rb_node **new = &(root->rb_node), *parent = NULL;
+        struct tcon_link *tlink;
+        while (*new) {
+                tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
+                parent = *new;
+                if (tlink->tl_uid > new_tlink->tl_uid)
+                        new = &((*new)->rb_left);
+                else
+                        new = &((*new)->rb_right);
+        }
+        rb_link_node(&new_tlink->tl_rbnode, parent, new);
+        rb_insert_color(&new_tlink->tl_rbnode, root);
+}
 /*
 * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
 * current task.
@@ -3309,7 +3324,7 @@ cifs_sb_tcon_pending_wait(void *unused)
 * If the superblock doesn't refer to a multiuser mount, then just return
 * the master tcon for the mount.
 *
- * First, search the radix tree for an existing tcon for this fsuid. If one
+ * First, search the rbtree for an existing tcon for this fsuid. If one
 * exists, then check to see if it's pending construction. If it is then wait
 * for construction to complete. Once it's no longer pending, check to see if
 * it failed and either return an error or retry construction, depending on
@@ -3322,14 +3337,14 @@ struct tcon_link *
 cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
 {
        int ret;
-        unsigned long fsuid = (unsigned long) current_fsuid();
+        uid_t fsuid = current_fsuid();
        struct tcon_link *tlink, *newtlink;
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
        spin_lock(&cifs_sb->tlink_tree_lock);
-        tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+        tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
        if (tlink)
                cifs_get_tlink(tlink);
        spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3338,36 +3353,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
                newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
                if (newtlink == NULL)
                        return ERR_PTR(-ENOMEM);
-                newtlink->tl_index = fsuid;
+                newtlink->tl_uid = fsuid;
                newtlink->tl_tcon = ERR_PTR(-EACCES);
                set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
                set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
                cifs_get_tlink(newtlink);
-                ret = radix_tree_preload(GFP_KERNEL);
-                if (ret != 0) {
-                        kfree(newtlink);
-                        return ERR_PTR(ret);
-                }
                spin_lock(&cifs_sb->tlink_tree_lock);
                /* was one inserted after previous search? */
-                tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+                tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
                if (tlink) {
                        cifs_get_tlink(tlink);
                        spin_unlock(&cifs_sb->tlink_tree_lock);
-                        radix_tree_preload_end();
                        kfree(newtlink);
                        goto wait_for_construction;
                }
-                ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
-                spin_unlock(&cifs_sb->tlink_tree_lock);
-                radix_tree_preload_end();
-                if (ret) {
-                        kfree(newtlink);
-                        return ERR_PTR(ret);
-                }
                tlink = newtlink;
+                tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
+                spin_unlock(&cifs_sb->tlink_tree_lock);
        } else {
 wait_for_construction:
                ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3413,39 +3416,39 @@ cifs_prune_tlinks(struct work_struct *work)
 {
        struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
                                                    prune_tlinks.work);
-        struct tcon_link *tlink[8];
+        struct rb_root *root = &cifs_sb->tlink_tree;
-        unsigned long now = jiffies;
+        struct rb_node *node = rb_first(root);
-        unsigned long index = 0;
+        struct rb_node *tmp;
-        int i, ret;
+        struct tcon_link *tlink;
-        do {
+        /*
-                spin_lock(&cifs_sb->tlink_tree_lock);
+         * Because we drop the spinlock in the loop in order to put the tlink
-                ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
+         * it's not guarded against removal of links from the tree. The only
-                                             (void **)tlink, index,
+         * places that remove entries from the tree are this function and
-                                             ARRAY_SIZE(tlink));
+         * umounts. Because this function is non-reentrant and is canceled
-                /* increment index for next pass */
+         * before umount can proceed, this is safe.
-                if (ret > 0)
+         */
-                        index = tlink[ret - 1]->tl_index + 1;
+        spin_lock(&cifs_sb->tlink_tree_lock);
-                for (i = 0; i < ret; i++) {
+        node = rb_first(root);
-                        if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) ||
+        while (node != NULL) {
-                            atomic_read(&tlink[i]->tl_count) != 0 ||
+                tmp = node;
-                            time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE,
+                node = rb_next(tmp);
-                                       now)) {
+                tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
-                                tlink[i] = NULL;
-                                continue;
+                if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
-                        }
+                    atomic_read(&tlink->tl_count) != 0 ||
-                        cifs_get_tlink(tlink[i]);
+                    time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
-                        clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
+                        continue;
-                        radix_tree_delete(&cifs_sb->tlink_tree,
-                                          tlink[i]->tl_index);
-                }
-                spin_unlock(&cifs_sb->tlink_tree_lock);
-                for (i = 0; i < ret; i++) {
+                cifs_get_tlink(tlink);
-                        if (tlink[i] != NULL)
+                clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
-                                cifs_put_tlink(tlink[i]);
+                rb_erase(tmp, root);
-                }
-        } while (ret != 0);
+                spin_unlock(&cifs_sb->tlink_tree_lock);
+                cifs_put_tlink(tlink);
+                spin_lock(&cifs_sb->tlink_tree_lock);
+        }
+        spin_unlock(&cifs_sb->tlink_tree_lock);
        queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
                                TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ae82159cf7fa..06c3e83fa387 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -146,12 +146,7 @@ client_can_cache:
                rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
                                         xid, NULL);
-        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+        cifs_set_oplock_level(pCifsInode, oplock);
-                pCifsInode->clientCanCacheAll = true;
-                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock granted on inode %p", inode);
-        } else if ((oplock & 0xF) == OPLOCK_READ)
-                pCifsInode->clientCanCacheRead = true;
        return rc;
 }
@@ -253,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
                list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
        spin_unlock(&cifs_file_list_lock);
-        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
+        cifs_set_oplock_level(pCifsInode, oplock);
-                pCifsInode->clientCanCacheAll = true;
-                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock inode %p", inode);
-        } else if ((oplock & 0xF) == OPLOCK_READ)
-                pCifsInode->clientCanCacheRead = true;
        file->private_data = pCifsFile;
        return pCifsFile;
@@ -271,8 +261,9 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
+        struct inode *inode = cifs_file->dentry->d_inode;
        struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
-        struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode);
+        struct cifsInodeInfo *cifsi = CIFS_I(inode);
        struct cifsLockInfo *li, *tmp;
        spin_lock(&cifs_file_list_lock);
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
        if (list_empty(&cifsi->openFileList)) {
                cFYI(1, "closing last open instance for inode %p",
                        cifs_file->dentry->d_inode);
-                cifsi->clientCanCacheRead = false;
+                cifs_set_oplock_level(cifsi, 0);
-                cifsi->clientCanCacheAll  = false;
        }
        spin_unlock(&cifs_file_list_lock);
@@ -607,8 +597,6 @@ reopen_success:
                rc = filemap_write_and_wait(inode->i_mapping);
                mapping_set_error(inode->i_mapping, rc);
-                pCifsInode->clientCanCacheAll = false;
-                pCifsInode->clientCanCacheRead = false;
                if (tcon->unix_ext)
                        rc = cifs_get_inode_info_unix(&inode,
                                full_path, inode->i_sb, xid);
@@ -622,18 +610,9 @@ reopen_success:
             invalidate the current end of file on the server
             we can not go to the server to get the new inod
             info */
-        if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-                pCifsInode->clientCanCacheAll = true;
+        cifs_set_oplock_level(pCifsInode, oplock);
-                pCifsInode->clientCanCacheRead = true;
-                cFYI(1, "Exclusive Oplock granted on inode %p",
-                         pCifsFile->dentry->d_inode);
-        } else if ((oplock & 0xF) == OPLOCK_READ) {
-                pCifsInode->clientCanCacheRead = true;
-                pCifsInode->clientCanCacheAll = false;
-        } else {
-                pCifsInode->clientCanCacheRead = false;
-                pCifsInode->clientCanCacheAll = false;
-        }
        cifs_relock_file(pCifsFile);
 reopen_error_exit:
@@ -775,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
        tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
-        if (file->private_data == NULL) {
-                rc = -EBADF;
-                FreeXid(xid);
-                return rc;
-        }
        netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
        if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -956,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        size_t write_size, loff_t *poffset)
 {
+        struct inode *inode = file->f_path.dentry->d_inode;
        int rc = 0;
        unsigned int bytes_written = 0;
        unsigned int total_written;
@@ -963,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        struct cifsTconInfo *pTcon;
        int xid, long_op;
        struct cifsFileInfo *open_file;
-        struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
+        struct cifsInodeInfo *cifsi = CIFS_I(inode);
        cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
@@ -1029,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
        cifs_stats_bytes_written(pTcon, total_written);
-        /* since the write may have blocked check these pointers again */
-        if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
-                struct inode *inode = file->f_path.dentry->d_inode;
 /* Do not update local mtime - server will set its actual value on write
- *              inode->i_ctime = inode->i_mtime =
+ *      inode->i_ctime = inode->i_mtime =
- *                      current_fs_time(inode->i_sb);*/
+ *              current_fs_time(inode->i_sb);*/
-                if (total_written > 0) {
+        if (total_written > 0) {
-                        spin_lock(&inode->i_lock);
+                spin_lock(&inode->i_lock);
-                        if (*poffset > file->f_path.dentry->d_inode->i_size)
+                if (*poffset > inode->i_size)
-                                i_size_write(file->f_path.dentry->d_inode,
+                        i_size_write(inode, *poffset);
-                                        *poffset);
+                spin_unlock(&inode->i_lock);
-                        spin_unlock(&inode->i_lock);
-                }
-                mark_inode_dirty_sync(file->f_path.dentry->d_inode);
        }
+        mark_inode_dirty_sync(inode);
        FreeXid(xid);
        return total_written;
 }
@@ -1178,7 +1148,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
                                        bool fsuid_only)
 {
        struct cifsFileInfo *open_file;
-        struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+        struct cifs_sb_info *cifs_sb;
        bool any_available = false;
        int rc;
@@ -1192,6 +1162,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
                return NULL;
        }
+        cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
        /* only filter by fsuid on multiuser mounts */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
                fsuid_only = false;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 39869c3c3efb..ef3a55bf86b6 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2177,7 +2177,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
        setattr_copy(inode, attrs);
        mark_inode_dirty(inode);
-        return 0;
 cifs_setattr_exit:
        kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf756f342..0c98672d0122 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
        struct cifs_sb_info *cifs_sb;
 #ifdef CONFIG_CIFS_POSIX
        struct cifsFileInfo *pSMBFile = filep->private_data;
-        struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink);
+        struct cifsTconInfo *tcon;
        __u64   ExtAttrBits = 0;
        __u64   ExtAttrMask = 0;
-        __u64   caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
+        __u64   caps;
 #endif /* CONFIG_CIFS_POSIX */
        xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        break;
 #ifdef CONFIG_CIFS_POSIX
                case FS_IOC_GETFLAGS:
+                        if (pSMBFile == NULL)
+                                break;
+                        tcon = tlink_tcon(pSMBFile->tlink);
+                        caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
                        if (CIFS_UNIX_EXTATTR_CAP & caps) {
-                                if (pSMBFile == NULL)
-                                        break;
                                rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
                                        &ExtAttrBits, &ExtAttrMask);
                                if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
                        break;
                case FS_IOC_SETFLAGS:
+                        if (pSMBFile == NULL)
+                                break;
+                        tcon = tlink_tcon(pSMBFile->tlink);
+                        caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
                        if (CIFS_UNIX_EXTATTR_CAP & caps) {
                                if (get_user(ExtAttrBits, (int __user *)arg)) {
                                        rc = -EFAULT;
                                        break;
                                }
-                                if (pSMBFile == NULL)
-                                        break;
                                /* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
                                        extAttrBits, &ExtAttrMask);*/
                        }
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index c4e296fe3518..43f10281bc19 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,10 +569,9 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
                                cFYI(1, "file id match, oplock break");
                                pCifsInode = CIFS_I(netfile->dentry->d_inode);
-                                pCifsInode->clientCanCacheAll = false;
-                                if (pSMB->OplockLevel == 0)
-                                        pCifsInode->clientCanCacheRead = false;
+                                cifs_set_oplock_level(pCifsInode,
+                                                      pSMB->OplockLevel);
                                /*
                                 * cifs_oplock_break_put() can't be called
                                 * from here.  Get reference after queueing
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
                           cifs_sb_master_tcon(cifs_sb)->treeName);
        }
 }
+void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
+{
+        oplock &= 0xF;
+        if (oplock == OPLOCK_EXCLUSIVE) {
+                cinode->clientCanCacheAll = true;
+                cinode->clientCanCacheRead = true;
+                cFYI(1, "Exclusive Oplock granted on inode %p",
+                     &cinode->vfs_inode);
+        } else if (oplock == OPLOCK_READ) {
+                cinode->clientCanCacheAll = false;
+                cinode->clientCanCacheRead = true;
+                cFYI(1, "Level II Oplock granted on inode %p",
+                    &cinode->vfs_inode);
+        } else {
+                cinode->clientCanCacheAll = false;
+                cinode->clientCanCacheRead = false;
+        }
+}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 410ed188faa1..a60579b007b0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,7 +19,6 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/ioctl.h>
 #include <linux/if.h>
 #include <linux/if_bridge.h>
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index 253732382d37..2720178b7718 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -28,7 +28,6 @@
 #include <linux/key.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2fedaf8b5012..acf8695fa8f0 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/parser.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
 #include <linux/vfs.h>
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8b5dd6369f82..6a5edea2d70b 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -177,7 +177,7 @@ struct mpage_da_data {
 struct ext4_io_page {
        struct page     *p_page;
-        int             p_count;
+        atomic_t        p_count;
 };
 #define MAX_IO_PAGES 128
@@ -858,6 +858,7 @@ struct ext4_inode_info {
        spinlock_t i_completed_io_lock;
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
+        atomic_t i_ioend_count; /* Number of outstanding io_end structs */
        /*
         * Transactions that contain inode's metadata needed to complete
@@ -2060,6 +2061,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
 extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern int ext4_end_io_nolock(ext4_io_end_t *io);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 191616470466..bdbe69902207 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -53,6 +53,7 @@
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
+        trace_ext4_begin_ordered_truncate(inode, new_size);
        return jbd2_journal_begin_ordered_truncate(
                                        EXT4_SB(inode->i_sb)->s_journal,
                                        &EXT4_I(inode)->jinode,
@@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
        handle_t *handle;
        int err;
+        trace_ext4_evict_inode(inode);
        if (inode->i_nlink) {
                truncate_inode_pages(&inode->i_data, 0);
                goto no_delete;
@@ -5410,9 +5412,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
         * will return the blocks that include the delayed allocation
         * blocks for this file.
         */
-        spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
        delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-        spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
        stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
        return 0;
@@ -5649,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
        int err, ret;
        might_sleep();
+        trace_ext4_mark_inode_dirty(inode, _RET_IP_);
        err = ext4_reserve_inode_write(handle, inode, &iloc);
        if (ext4_handle_valid(handle) &&
            EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bf5ae883b1bd..eb3bc2fe647e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,6 +331,30 @@ mext_out:
                return err;
        }
+        case FITRIM:
+        {
+                struct super_block *sb = inode->i_sb;
+                struct fstrim_range range;
+                int ret = 0;
+                if (!capable(CAP_SYS_ADMIN))
+                        return -EPERM;
+                if (copy_from_user(&range, (struct fstrim_range *)arg,
+                    sizeof(range)))
+                        return -EFAULT;
+                ret = ext4_trim_fs(sb, &range);
+                if (ret < 0)
+                        return ret;
+                if (copy_to_user((struct fstrim_range *)arg, &range,
+                    sizeof(range)))
+                        return -EFAULT;
+                return 0;
+        }
        default:
                return -ENOTTY;
        }
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c58eba34724a..5b4d4e3a4d58 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4640,8 +4640,6 @@ do_more:
                 * with group lock held. generate_buddy look at
                 * them with group lock_held
                 */
-                if (test_opt(sb, DISCARD))
-                        ext4_issue_discard(sb, block_group, bit, count);
                ext4_lock_group(sb, block_group);
                mb_clear_bits(bitmap_bh->b_data, bit, count);
                mb_free_blocks(inode, &e4b, bit, count);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 46a7d6a9d976..beacce11ac50 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,8 +32,14 @@
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
+#define WQ_HASH_SZ              37
+#define to_ioend_wq(v)  (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
 int __init ext4_init_pageio(void)
 {
+        int i;
        io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
        if (io_page_cachep == NULL)
                return -ENOMEM;
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void)
                kmem_cache_destroy(io_page_cachep);
                return -ENOMEM;
        }
+        for (i = 0; i < WQ_HASH_SZ; i++)
+                init_waitqueue_head(&ioend_wq[i]);
        return 0;
 }
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void)
        kmem_cache_destroy(io_page_cachep);
 }
+void ext4_ioend_wait(struct inode *inode)
+{
+        wait_queue_head_t *wq = to_ioend_wq(inode);
+        wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+static void put_io_page(struct ext4_io_page *io_page)
+{
+        if (atomic_dec_and_test(&io_page->p_count)) {
+                end_page_writeback(io_page->p_page);
+                put_page(io_page->p_page);
+                kmem_cache_free(io_page_cachep, io_page);
+        }
+}
 void ext4_free_io_end(ext4_io_end_t *io)
 {
        int i;
+        wait_queue_head_t *wq;
        BUG_ON(!io);
        if (io->page)
                put_page(io->page);
-        for (i = 0; i < io->num_io_pages; i++) {
+        for (i = 0; i < io->num_io_pages; i++)
-                if (--io->pages[i]->p_count == 0) {
+                put_io_page(io->pages[i]);
-                        struct page *page = io->pages[i]->p_page;
-                        end_page_writeback(page);
-                        put_page(page);
-                        kmem_cache_free(io_page_cachep, io->pages[i]);
-                }
-        }
        io->num_io_pages = 0;
-        iput(io->inode);
+        wq = to_ioend_wq(io->inode);
+        if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+            waitqueue_active(wq))
+                wake_up_all(wq);
        kmem_cache_free(io_end_cachep, io);
 }
@@ -142,8 +163,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
        io = kmem_cache_alloc(io_end_cachep, flags);
        if (io) {
                memset(io, 0, sizeof(*io));
-                io->inode = igrab(inode);
+                atomic_inc(&EXT4_I(inode)->i_ioend_count);
-                BUG_ON(!io->inode);
+                io->inode = inode;
                INIT_WORK(&io->work, ext4_end_io_work);
                INIT_LIST_HEAD(&io->list);
        }
@@ -171,35 +192,15 @@ static void ext4_end_bio(struct bio *bio, int error)
        struct workqueue_struct *wq;
        struct inode *inode;
        unsigned long flags;
-        ext4_fsblk_t err_block;
        int i;
        BUG_ON(!io_end);
-        inode = io_end->inode;
        bio->bi_private = NULL;
        bio->bi_end_io = NULL;
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = 0;
-        err_block = bio->bi_sector >> (inode->i_blkbits - 9);
        bio_put(bio);
-        if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
-                pr_err("sb umounted, discard end_io request for inode %lu\n",
-                        io_end->inode->i_ino);
-                ext4_free_io_end(io_end);
-                return;
-        }
-        if (error) {
-                io_end->flag |= EXT4_IO_END_ERROR;
-                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
-                             "(offset %llu size %ld starting block %llu)",
-                             inode->i_ino,
-                             (unsigned long long) io_end->offset,
-                             (long) io_end->size,
-                             (unsigned long long) err_block);
-        }
        for (i = 0; i < io_end->num_io_pages; i++) {
                struct page *page = io_end->pages[i]->p_page;
                struct buffer_head *bh, *head;
@@ -236,14 +237,6 @@ static void ext4_end_bio(struct bio *bio, int error)
                        } while (bh != head);
                }
-                if (--io_end->pages[i]->p_count == 0) {
-                        struct page *page = io_end->pages[i]->p_page;
-                        end_page_writeback(page);
-                        put_page(page);
-                        kmem_cache_free(io_page_cachep, io_end->pages[i]);
-                }
                /*
                 * If this is a partial write which happened to make
                 * all buffers uptodate then we can optimize away a
@@ -253,9 +246,22 @@ static void ext4_end_bio(struct bio *bio, int error)
                 */
                if (!partial_write)
                        SetPageUptodate(page);
-        }
+                put_io_page(io_end->pages[i]);
+        }
        io_end->num_io_pages = 0;
+        inode = io_end->inode;
+        if (error) {
+                io_end->flag |= EXT4_IO_END_ERROR;
+                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+                             "(offset %llu size %ld starting block %llu)",
+                             inode->i_ino,
+                             (unsigned long long) io_end->offset,
+                             (long) io_end->size,
+                             (unsigned long long)
+                             bio->bi_sector >> (inode->i_blkbits - 9));
+        }
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +311,6 @@ static int io_submit_init(struct ext4_io_submit *io,
        bio->bi_private = io->io_end = io_end;
        bio->bi_end_io = ext4_end_bio;
-        io_end->inode = inode;
        io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
        io->io_bio = bio;
@@ -360,7 +365,7 @@ submit_and_retry:
        if ((io_end->num_io_pages == 0) ||
            (io_end->pages[io_end->num_io_pages-1] != io_page)) {
                io_end->pages[io_end->num_io_pages++] = io_page;
-                io_page->p_count++;
+                atomic_inc(&io_page->p_count);
        }
        return 0;
 }
@@ -389,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                return -ENOMEM;
        }
        io_page->p_page = page;
-        io_page->p_count = 0;
+        atomic_set(&io_page->p_count, 1);
        get_page(page);
        for (bh = head = page_buffers(page), block_start = 0;
@@ -421,10 +426,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
         * PageWriteback bit from the page to prevent the system from
         * wedging later on.
         */
-        if (io_page->p_count == 0) {
+        put_io_page(io_page);
-                put_page(page);
-                end_page_writeback(page);
-                kmem_cache_free(io_page_cachep, io_page);
-        }
        return ret;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 40131b777af6..e32195d6aac3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -828,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->cur_aio_dio = NULL;
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
+        atomic_set(&ei->i_ioend_count, 0);
        return &ei->vfs_inode;
 }
+static int ext4_drop_inode(struct inode *inode)
+{
+        int drop = generic_drop_inode(inode);
+        trace_ext4_drop_inode(inode, drop);
+        return drop;
+}
 static void ext4_destroy_inode(struct inode *inode)
 {
+        ext4_ioend_wait(inode);
        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
                ext4_msg(inode->i_sb, KERN_ERR,
                         "Inode %lu (%p): orphan list check failed!",
@@ -1173,6 +1183,7 @@ static const struct super_operations ext4_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+        .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .put_super      = ext4_put_super,
        .sync_fs        = ext4_sync_fs,
@@ -1186,7 +1197,6 @@ static const struct super_operations ext4_sops = {
        .quota_write    = ext4_quota_write,
 #endif
        .bdev_try_to_free_page = bdev_try_to_free_page,
-        .trim_fs        = ext4_trim_fs
 };
 static const struct super_operations ext4_nojournal_sops = {
@@ -1194,6 +1204,7 @@ static const struct super_operations ext4_nojournal_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+        .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .write_super    = ext4_write_super,
        .put_super      = ext4_put_super,
@@ -2699,7 +2710,6 @@ static int ext4_lazyinit_thread(void *arg)
        struct ext4_li_request *elr;
        unsigned long next_wakeup;
        DEFINE_WAIT(wait);
-        int ret;
        BUG_ON(NULL == eli);
@@ -2723,13 +2733,12 @@ cont_thread:
                        elr = list_entry(pos, struct ext4_li_request,
                                         lr_request);
-                        if (time_after_eq(jiffies, elr->lr_next_sched))
+                        if (time_after_eq(jiffies, elr->lr_next_sched)) {
-                                ret = ext4_run_li_request(elr);
+                                if (ext4_run_li_request(elr) != 0) {
+                                        /* error, remove the lazy_init job */
-                        if (ret) {
+                                        ext4_remove_li_request(elr);
-                                ret = 0;
+                                        continue;
-                                ext4_remove_li_request(elr);
+                                }
-                                continue;
                        }
                        if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2740,7 +2749,8 @@ cont_thread:
                if (freezing(current))
                        refrigerator();
-                if (time_after_eq(jiffies, next_wakeup)) {
+                if ((time_after_eq(jiffies, next_wakeup)) ||
+                    (MAX_JIFFY_OFFSET == next_wakeup)) {
                        cond_resched();
                        continue;
                }
@@ -2788,9 +2798,6 @@ static void ext4_clear_request_list(void)
        struct ext4_li_request *elr;
        mutex_lock(&ext4_li_info->li_list_mtx);
-        if (list_empty(&ext4_li_info->li_request_list))
-                return;
        list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
                elr = list_entry(pos, struct ext4_li_request,
                                 lr_request);
@@ -3257,13 +3264,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
         * Test whether we have more sectors than will fit in sector_t,
         * and whether the max offset is addressable by the page cache.
         */
-        ret = generic_check_addressable(sb->s_blocksize_bits,
+        err = generic_check_addressable(sb->s_blocksize_bits,
                                        ext4_blocks_count(es));
-        if (ret) {
+        if (err) {
                ext4_msg(sb, KERN_ERR, "filesystem"
                         " too large to mount safely on this system");
                if (sizeof(sector_t) < 8)
                        ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
+                ret = err;
                goto failed_mount;
        }
@@ -3348,6 +3356,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
+        err = percpu_counter_init(&sbi->s_freeblocks_counter,
+                        ext4_count_free_blocks(sb));
+        if (!err) {
+                err = percpu_counter_init(&sbi->s_freeinodes_counter,
+                                ext4_count_free_inodes(sb));
+        }
+        if (!err) {
+                err = percpu_counter_init(&sbi->s_dirs_counter,
+                                ext4_count_dirs(sb));
+        }
+        if (!err) {
+                err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+        }
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "insufficient memory");
+                goto failed_mount3;
+        }
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_max_writeback_mb_bump = 128;
@@ -3446,22 +3472,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
-no_journal:
+        /*
-        err = percpu_counter_init(&sbi->s_freeblocks_counter,
+         * The journal may have updated the bg summary counts, so we
-                                  ext4_count_free_blocks(sb));
+         * need to update the global counters.
-        if (!err)
+         */
-                err = percpu_counter_init(&sbi->s_freeinodes_counter,
+        percpu_counter_set(&sbi->s_freeblocks_counter,
-                                          ext4_count_free_inodes(sb));
+                           ext4_count_free_blocks(sb));
-        if (!err)
+        percpu_counter_set(&sbi->s_freeinodes_counter,
-                err = percpu_counter_init(&sbi->s_dirs_counter,
+                           ext4_count_free_inodes(sb));
-                                          ext4_count_dirs(sb));
+        percpu_counter_set(&sbi->s_dirs_counter,
-        if (!err)
+                           ext4_count_dirs(sb));
-                err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+        percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
-        if (err) {
-                ext4_msg(sb, KERN_ERR, "insufficient memory");
-                goto failed_mount_wq;
-        }
+no_journal:
        EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
        if (!EXT4_SB(sb)->dio_unwritten_wq) {
                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3611,10 +3634,6 @@ failed_mount_wq:
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
-        percpu_counter_destroy(&sbi->s_freeblocks_counter);
-        percpu_counter_destroy(&sbi->s_freeinodes_counter);
-        percpu_counter_destroy(&sbi->s_dirs_counter);
-        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount3:
        if (sbi->s_flex_groups) {
                if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3622,6 +3641,10 @@ failed_mount3:
                else
                        kfree(sbi->s_flex_groups);
        }
+        percpu_counter_destroy(&sbi->s_freeblocks_counter);
+        percpu_counter_destroy(&sbi->s_freeinodes_counter);
+        percpu_counter_destroy(&sbi->s_dirs_counter);
+        percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -3949,13 +3972,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        else
                es->s_kbytes_written =
                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
+        ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-                ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+                                           &EXT4_SB(sb)->s_freeblocks_counter));
-                                        &EXT4_SB(sb)->s_freeblocks_counter));
+        es->s_free_inodes_count =
-        if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+                cpu_to_le32(percpu_counter_sum_positive(
-                es->s_free_inodes_count =
+                                &EXT4_SB(sb)->s_freeinodes_counter));
-                        cpu_to_le32(percpu_counter_sum_positive(
-                                        &EXT4_SB(sb)->s_freeinodes_counter));
        sb->s_dirt = 0;
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
@@ -4556,12 +4577,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 static int ext4_quota_off(struct super_block *sb, int type)
 {
-        /* Force all delayed allocation blocks to be allocated */
+        /* Force all delayed allocation blocks to be allocated.
-        if (test_opt(sb, DELALLOC)) {
+         * Caller already holds s_umount sem */
-                down_read(&sb->s_umount);
+        if (test_opt(sb, DELALLOC))
                sync_filesystem(sb);
-                up_read(&sb->s_umount);
-        }
        return dquot_quota_off(sb, type);
 }
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 06d582732d34..5ab3839dfcb9 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -138,10 +138,8 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
                                      struct gfs2_inum_host *inum)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
-        struct gfs2_holder i_gh;
        struct inode *inode;
        struct dentry *dentry;
-        int error;
        inode = gfs2_ilookup(sb, inum->no_addr);
        if (inode) {
@@ -152,52 +150,16 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
                goto out_inode;
        }
-        error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
+        inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
-                                  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+                                    GFS2_BLKST_DINODE);
-        if (error)
+        if (IS_ERR(inode))
-                return ERR_PTR(error);
+                return ERR_CAST(inode);
-        error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
-        if (error)
-                goto fail;
-        inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
-        if (IS_ERR(inode)) {
-                error = PTR_ERR(inode);
-                goto fail;
-        }
-        error = gfs2_inode_refresh(GFS2_I(inode));
-        if (error) {
-                iput(inode);
-                goto fail;
-        }
-        /* Pick up the works we bypass in gfs2_inode_lookup */
-        if (inode->i_state & I_NEW) 
-                gfs2_set_iop(inode);
-        if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
-                iput(inode);
-                goto fail;
-        }
-        error = -EIO;
-        if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
-                iput(inode);
-                goto fail;
-        }
-        gfs2_glock_dq_uninit(&i_gh);
 out_inode:
        dentry = d_obtain_alias(inode);
        if (!IS_ERR(dentry))
                dentry->d_op = &gfs2_dops;
        return dentry;
-fail:
-        gfs2_glock_dq_uninit(&i_gh);
-        return ERR_PTR(error);
 }
 static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 87778857f099..f92c17704169 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -686,21 +686,20 @@ static void delete_work_func(struct work_struct *work)
 {
        struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
        struct gfs2_sbd *sdp = gl->gl_sbd;
-        struct gfs2_inode *ip = NULL;
+        struct gfs2_inode *ip;
        struct inode *inode;
-        u64 no_addr = 0;
+        u64 no_addr = gl->gl_name.ln_number;
+        ip = gl->gl_object;
+        /* Note: Unsafe to dereference ip as we don't hold right refs/locks */
-        spin_lock(&gl->gl_spin);
-        ip = (struct gfs2_inode *)gl->gl_object;
        if (ip)
-                no_addr = ip->i_no_addr;
-        spin_unlock(&gl->gl_spin);
-        if (ip) {
                inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
-                if (inode) {
+        else
-                        d_prune_aliases(inode);
+                inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
-                        iput(inode);
+        if (inode && !IS_ERR(inode)) {
-                }
+                d_prune_aliases(inode);
+                iput(inode);
        }
        gfs2_glock_put(gl);
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06370f8bd8cf..e1213f7f9217 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
        return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
-struct gfs2_skip_data {
-        u64     no_addr;
-        int     skipped;
-};
-static int iget_skip_test(struct inode *inode, void *opaque)
-{
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_skip_data *data = opaque;
-        if (ip->i_no_addr == data->no_addr) {
-                if (inode->i_state & (I_FREEING|I_WILL_FREE)){
-                        data->skipped = 1;
-                        return 0;
-                }
-                return 1;
-        }
-        return 0;
-}
-static int iget_skip_set(struct inode *inode, void *opaque)
-{
-        struct gfs2_inode *ip = GFS2_I(inode);
-        struct gfs2_skip_data *data = opaque;
-        if (data->skipped)
-                return 1;
-        inode->i_ino = (unsigned long)(data->no_addr);
-        ip->i_no_addr = data->no_addr;
-        return 0;
-}
-static struct inode *gfs2_iget_skip(struct super_block *sb,
-                                    u64 no_addr)
-{
-        struct gfs2_skip_data data;
-        unsigned long hash = (unsigned long)no_addr;
-        data.no_addr = no_addr;
-        data.skipped = 0;
-        return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
-}
 /**
 * GFS2 lookup code fills in vfs inode contents based on info obtained
 * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -243,93 +200,54 @@ fail:
        return ERR_PTR(error);
 }
-/**
+struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
- * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation
+                                  u64 *no_formal_ino, unsigned int blktype)
- *                               and try to reclaim it by doing iput.
- *
- * This function assumes no rgrp locks are currently held.
- *
- * @sb: The super block
- * no_addr: The inode number
- *
- */
-void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
 {
-        struct gfs2_sbd *sdp;
+        struct super_block *sb = sdp->sd_vfs;
-        struct gfs2_inode *ip;
+        struct gfs2_holder i_gh;
-        struct gfs2_glock *io_gl = NULL;
-        int error;
-        struct gfs2_holder gh;
        struct inode *inode;
+        int error;
-        inode = gfs2_iget_skip(sb, no_addr);
+        error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
+                                  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-        if (!inode)
+        if (error)
-                return;
+                return ERR_PTR(error);
-        /* If it's not a new inode, someone's using it, so leave it alone. */
-        if (!(inode->i_state & I_NEW)) {
-                iput(inode);
-                return;
-        }
-        ip = GFS2_I(inode);
-        sdp = GFS2_SB(inode);
-        ip->i_no_formal_ino = -1;
-        error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
+        error = gfs2_check_blk_type(sdp, no_addr, blktype);
-        if (unlikely(error))
+        if (error)
                goto fail;
-        ip->i_gl->gl_object = ip;
-        error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
+        inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
-        if (unlikely(error))
+        if (IS_ERR(inode))
-                goto fail_put;
+                goto fail;
-        set_bit(GIF_INVALID, &ip->i_flags);
-        error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
-                                   &ip->i_iopen_gh);
-        if (unlikely(error))
-                goto fail_iopen;
-        ip->i_iopen_gh.gh_gl->gl_object = ip;
+        error = gfs2_inode_refresh(GFS2_I(inode));
-        gfs2_glock_put(io_gl);
+        if (error)
-        io_gl = NULL;
+                goto fail_iput;
-        inode->i_mode = DT2IF(DT_UNKNOWN);
+        /* Pick up the works we bypass in gfs2_inode_lookup */
+        if (inode->i_state & I_NEW) 
+                gfs2_set_iop(inode);
-        /*
+        /* Two extra checks for NFS only */
-         * We must read the inode in order to work out its type in
+        if (no_formal_ino) {
-         * this case. Note that this doesn't happen often as we normally
+                error = -ESTALE;
-         * know the type beforehand. This code path only occurs during
+                if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
-         * unlinked inode recovery (where it is safe to do this glock,
+                        goto fail_iput;
-         * which is not true in the general case).
-         */
-        error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
-                                   &gh);
-        if (unlikely(error))
-                goto fail_glock;
-        /* Inode is now uptodate */
+                error = -EIO;
-        gfs2_glock_dq_uninit(&gh);
+                if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
-        gfs2_set_iop(inode);
+                        goto fail_iput;
-        /* The iput will cause it to be deleted. */
+                error = 0;
-        iput(inode);
+        }
-        return;
-fail_glock:
-        gfs2_glock_dq(&ip->i_iopen_gh);
-fail_iopen:
-        if (io_gl)
-                gfs2_glock_put(io_gl);
-fail_put:
-        ip->i_gl->gl_object = NULL;
-        gfs2_glock_put(ip->i_gl);
 fail:
-        iget_failed(inode);
+        gfs2_glock_dq_uninit(&i_gh);
-        return;
+        return error ? ERR_PTR(error) : inode;
+fail_iput:
+        iput(inode);
+        goto fail;
 }
 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6720d7d5fbc6..d8499fadcc53 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,9 @@ err:
 extern void gfs2_set_iop(struct inode *inode);
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
                                       u64 no_addr, u64 no_formal_ino);
-extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+                                         u64 *no_formal_ino,
+                                         unsigned int blktype);
 extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bef3ab6cf5c1..33c8407b876f 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -963,17 +963,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
 *          The inode, if one has been found, in inode.
 */
-static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
+static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
-                           u64 skip)
 {
        u32 goal = 0, block;
        u64 no_addr;
        struct gfs2_sbd *sdp = rgd->rd_sbd;
        unsigned int n;
+        struct gfs2_glock *gl;
+        struct gfs2_inode *ip;
+        int error;
+        int found = 0;
-        for(;;) {
+        while (goal < rgd->rd_data) {
-                if (goal >= rgd->rd_data)
-                        break;
                down_write(&sdp->sd_log_flush_lock);
                n = 1;
                block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +991,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
                if (no_addr == skip)
                        continue;
                *last_unlinked = no_addr;
-                return no_addr;
+                error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+                if (error)
+                        continue;
+                /* If the inode is already in cache, we can ignore it here
+                 * because the existing inode disposal code will deal with
+                 * it when all refs have gone away. Accessing gl_object like
+                 * this is not safe in general. Here it is ok because we do
+                 * not dereference the pointer, and we only need an approx
+                 * answer to whether it is NULL or not.
+                 */
+                ip = gl->gl_object;
+                if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+                        gfs2_glock_put(gl);
+                else
+                        found++;
+                /* Limit reclaim to sensible number of tasks */
+                if (found > 2*NR_CPUS)
+                        return;
        }
        rgd->rd_flags &= ~GFS2_RDF_CHECK;
-        return 0;
+        return;
 }
 /**
@@ -1075,11 +1097,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
 * Try to acquire rgrp in way which avoids contending with others.
 *
 * Returns: errno
- *          unlinked: the block address of an unlinked block to be reclaimed
 */
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
+static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
-                          u64 *last_unlinked)
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1109,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
        int loops = 0;
        int error, rg_locked;
-        *unlinked = 0;
        rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
        while (rgd) {
@@ -1106,17 +1125,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
-                        /* If the rg came in already locked, there's no
+                        if (rgd->rd_flags & GFS2_RDF_CHECK)
-                           way we can recover from a failed try_rgrp_unlink
+                                try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
-                           because that would require an iput which can only
-                           happen after the rgrp is unlocked. */
-                        if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-                                *unlinked = try_rgrp_unlink(rgd, last_unlinked,
-                                                           ip->i_no_addr);
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
-                        if (*unlinked)
-                                return -EAGAIN;
                        /* fall through */
                case GLR_TRYFAILED:
                        rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1157,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
                case 0:
                        if (try_rgrp_fit(rgd, al))
                                goto out;
-                        if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
+                        if (rgd->rd_flags & GFS2_RDF_CHECK)
-                                *unlinked = try_rgrp_unlink(rgd, last_unlinked,
+                                try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
-                                                            ip->i_no_addr);
                        if (!rg_locked)
                                gfs2_glock_dq_uninit(&al->al_rgd_gh);
-                        if (*unlinked)
-                                return -EAGAIN;
                        break;
                case GLR_TRYFAILED:
@@ -1204,12 +1213,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_alloc *al = ip->i_alloc;
        int error = 0;
-        u64 last_unlinked = NO_BLOCK, unlinked;
+        u64 last_unlinked = NO_BLOCK;
+        int tries = 0;
        if (gfs2_assert_warn(sdp, al->al_requested))
                return -EINVAL;
-try_again:
        if (hold_rindex) {
                /* We need to hold the rindex unless the inode we're using is
                   the rindex itself, in which case it's already held. */
@@ -1218,31 +1227,23 @@ try_again:
                else if (!sdp->sd_rgrps) /* We may not have the rindex read
                                            in, so: */
                        error = gfs2_ri_update_special(ip);
+                if (error)
+                        return error;
        }
-        if (error)
+        do {
-                return error;
+                error = get_local_rgrp(ip, &last_unlinked);
+                /* If there is no space, flushing the log may release some */
+                if (error)
+                        gfs2_log_flush(sdp, NULL);
+        } while (error && tries++ < 3);
-        /* Find an rgrp suitable for allocation.  If it encounters any unlinked
-           dinodes along the way, error will equal -EAGAIN and unlinked will
-           contains it block address. We then need to look up that inode and
-           try to free it, and try the allocation again. */
-        error = get_local_rgrp(ip, &unlinked, &last_unlinked);
        if (error) {
                if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
                        gfs2_glock_dq_uninit(&al->al_ri_gh);
-                if (error != -EAGAIN)
+                return error;
-                        return error;
-                gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
-                /* regardless of whether or not gfs2_process_unlinked_inode
-                   was successful, we don't want to repeat it again. */
-                last_unlinked = unlinked;
-                gfs2_log_flush(sdp, NULL);
-                error = 0;
-                goto try_again;
        }
        /* no error, so we have the rgrp set in the inode's allocation. */
        al->al_file = file;
        al->al_line = line;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d6cfac1f0a40..a5fe68189eed 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -932,8 +932,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
        if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
                *user = current_user();
                if (user_shm_lock(size, *user)) {
-                        WARN_ONCE(1,
+                        printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
-                          "Using mlock ulimits for SHM_HUGETLB deprecated\n");
                } else {
                        *user = NULL;
                        return ERR_PTR(-EPERM);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index e92fdbb3bc3a..d6cc16476620 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -6,7 +6,6 @@
 #include <linux/syscalls.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/fs.h>
@@ -530,41 +529,6 @@ static int ioctl_fsthaw(struct file *filp)
        return thaw_super(sb);
 }
-static int ioctl_fstrim(struct file *filp, void __user *argp)
-{
-        struct super_block *sb = filp->f_path.dentry->d_inode->i_sb;
-        struct fstrim_range range;
-        int ret = 0;
-        if (!capable(CAP_SYS_ADMIN))
-                return -EPERM;
-        /* If filesystem doesn't support trim feature, return. */
-        if (sb->s_op->trim_fs == NULL)
-                return -EOPNOTSUPP;
-        /* If a blockdevice-backed filesystem isn't specified, return EINVAL. */
-        if (sb->s_bdev == NULL)
-                return -EINVAL;
-        if (argp == NULL) {
-                range.start = 0;
-                range.len = ULLONG_MAX;
-                range.minlen = 0;
-        } else if (copy_from_user(&range, argp, sizeof(range)))
-                return -EFAULT;
-        ret = sb->s_op->trim_fs(sb, &range);
-        if (ret < 0)
-                return ret;
-        if ((argp != NULL) &&
-            (copy_to_user(argp, &range, sizeof(range))))
-                return -EFAULT;
-        return 0;
-}
 /*
 * When you add any new common ioctls to the switches above and below
 * please update compat_sys_ioctl() too.
@@ -615,10 +579,6 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
                error = ioctl_fsthaw(filp);
                break;
-        case FITRIM:
-                error = ioctl_fstrim(filp, argp);
-                break;
        case FS_IOC_FIEMAP:
                return ioctl_fiemap(filp, arg);
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc6..2f7d05c89922 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -111,12 +111,14 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
        read_lock(&tasklist_lock);
        switch (which) {
                case IOPRIO_WHO_PROCESS:
+                        rcu_read_lock();
                        if (!who)
                                p = current;
                        else
                                p = find_task_by_vpid(who);
                        if (p)
                                ret = set_task_ioprio(p, ioprio);
+                        rcu_read_unlock();
                        break;
                case IOPRIO_WHO_PGRP:
                        if (!who)
@@ -139,7 +141,12 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
                                break;
                        do_each_thread(g, p) {
-                                if (__task_cred(p)->uid != who)
+                                int match;
+                                rcu_read_lock();
+                                match = __task_cred(p)->uid == who;
+                                rcu_read_unlock();
+                                if (!match)
                                        continue;
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
@@ -200,12 +207,14 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
        read_lock(&tasklist_lock);
        switch (which) {
                case IOPRIO_WHO_PROCESS:
+                        rcu_read_lock();
                        if (!who)
                                p = current;
                        else
                                p = find_task_by_vpid(who);
                        if (p)
                                ret = get_task_ioprio(p);
+                        rcu_read_unlock();
                        break;
                case IOPRIO_WHO_PGRP:
                        if (!who)
@@ -232,7 +241,12 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
                                break;
                        do_each_thread(g, p) {
-                                if (__task_cred(p)->uid != user->uid)
+                                int match;
+                                rcu_read_lock();
+                                match = __task_cred(p)->uid == user->uid;
+                                rcu_read_unlock();
+                                if (!match)
                                        continue;
                                tmpio = get_task_ioprio(p);
                                if (tmpio < 0)
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c590d155c095..f837ba953529 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -899,6 +899,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
        /* journal descriptor can store up to n blocks -bzzz */
        journal->j_blocksize = blocksize;
+        journal->j_dev = bdev;
+        journal->j_fs_dev = fs_dev;
+        journal->j_blk_offset = start;
+        journal->j_maxlen = len;
+        bdevname(journal->j_dev, journal->j_devname);
+        p = journal->j_devname;
+        while ((p = strchr(p, '/')))
+                *p = '!';
        jbd2_stats_proc_init(journal);
        n = journal->j_blocksize / sizeof(journal_block_tag_t);
        journal->j_wbufsize = n;
@@ -908,14 +916,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
                        __func__);
                goto out_err;
        }
-        journal->j_dev = bdev;
-        journal->j_fs_dev = fs_dev;
-        journal->j_blk_offset = start;
-        journal->j_maxlen = len;
-        bdevname(journal->j_dev, journal->j_devname);
-        p = journal->j_devname;
-        while ((p = strchr(p, '/')))
-                *p = '!';
        bh = __getblk(journal->j_dev, start, journal->j_blocksize);
        if (!bh) {
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index d5bb86866e6c..25509eb28fd7 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,7 +14,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 #define NLMDBG_FACILITY         NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 47ea1e1925b8..332c54cf75e0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,7 +7,6 @@
 */
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 25e21e4023b2..ed0c59fe23ce 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -124,7 +124,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
                        continue;
                if (host->h_server != ni->server)
                        continue;
-                if (ni->server &&
+                if (ni->server && ni->src_len != 0 &&
                    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
                        continue;
@@ -167,6 +167,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
        host->h_addrlen = ni->salen;
        rpc_set_port(nlm_addr(host), 0);
        memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
+        host->h_srcaddrlen = ni->src_len;
        host->h_version    = ni->version;
        host->h_proto      = ni->protocol;
        host->h_rpcclnt    = NULL;
@@ -238,9 +239,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                                     const char *hostname,
                                     int noresvport)
 {
-        const struct sockaddr source = {
-                .sa_family      = AF_UNSPEC,
-        };
        struct nlm_lookup_host_info ni = {
                .server         = 0,
                .sap            = sap,
@@ -249,8 +247,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
                .version        = version,
                .hostname       = hostname,
                .hostname_len   = strlen(hostname),
-                .src_sap        = &source,
-                .src_len        = sizeof(source),
                .noresvport     = noresvport,
        };
@@ -357,7 +353,6 @@ nlm_bind_host(struct nlm_host *host)
                        .protocol       = host->h_proto,
                        .address        = nlm_addr(host),
                        .addrsize       = host->h_addrlen,
-                        .saddress       = nlm_srcaddr(host),
                        .timeout        = &timeparms,
                        .servername     = host->h_name,
                        .program        = &nlm_program,
@@ -376,6 +371,8 @@ nlm_bind_host(struct nlm_host *host)
                        args.flags |= RPC_CLNT_CREATE_HARDRTRY;
                if (host->h_noresvport)
                        args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+                if (host->h_srcaddrlen)
+                        args.saddress = nlm_srcaddr(host);
                clnt = rpc_create(&args);
                if (!IS_ERR(clnt))
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a336e832475d..38d261192453 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -9,7 +9,6 @@
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c462d346acbd..ef5659b211e9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -25,7 +25,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/nlm.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index c3069f38d602..0caea5310ac3 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -9,7 +9,6 @@
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
diff --git a/fs/locks.c b/fs/locks.c
index 65765cb6afed..8729347bcd1a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -122,7 +122,6 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
 #include <linux/rcupdate.h>
@@ -1504,9 +1503,8 @@ static int do_fcntl_delete_lease(struct file *filp)
 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 {
-        struct file_lock *fl;
+        struct file_lock *fl, *ret;
        struct fasync_struct *new;
-        struct inode *inode = filp->f_path.dentry->d_inode;
        int error;
        fl = lease_alloc(filp, arg);
@@ -1518,13 +1516,16 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
                locks_free_lock(fl);
                return -ENOMEM;
        }
+        ret = fl;
        lock_flocks();
-        error = __vfs_setlease(filp, arg, &fl);
+        error = __vfs_setlease(filp, arg, &ret);
        if (error) {
                unlock_flocks();
                locks_free_lock(fl);
                goto out_free_fasync;
        }
+        if (ret != fl)
+                locks_free_lock(fl);
        /*
         * fasync_insert_entry() returns the old entry if any.
@@ -1532,17 +1533,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
         * inserted it into the fasync list. Clear new so that
         * we don't release it here.
         */
-        if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
+        if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
                new = NULL;
-        if (error < 0) {
+        error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-                /* remove lease just inserted by setlease */
-                fl->fl_type = F_UNLCK | F_INPROGRESS;
-                fl->fl_break_time = jiffies - 10;
-                time_out_leases(inode);
-        } else {
-                error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-        }
        unlock_flocks();
 out_free_fasync:
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index cd51a36b37f0..57afd4a6fabb 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -486,7 +486,7 @@ static inline int logfs_get_sb_bdev(struct logfs_super *s,
 /* dev_mtd.c */
 #ifdef CONFIG_MTD
-int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
 #else
 static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
 {
diff --git a/fs/namespace.c b/fs/namespace.c
index 8a415c9c5e55..3dbfc072ec70 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/acct.h>
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aac8832e919e..f22b12e7d337 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
-#include <linux/smp_lock.h>
 #include <linux/ncp_fs.h>
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6c754f70c529..cb50aaf981df 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,7 +17,6 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/ncp_fs.h>
 #include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d290545aa0c4..8fb93b604e73 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -26,7 +26,6 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c2a1f9a155c3..d40a547e3377 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -17,7 +17,6 @@
 #include <linux/mount.h>
 #include <linux/slab.h>
 #include <linux/highuid.h>
-#include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index aeec017fe814..93a8b3bd69e3 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,7 +9,6 @@
 #include <linux/completion.h>
 #include <linux/ip.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 232a7eead33a..1fd62fc49be3 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 #include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 07ac3847e562..662df2a5fad5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
 #include "delegation.h"
 #include "iostat.h"
@@ -194,9 +195,13 @@ typedef struct {
 static
 struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
 {
+        void *ptr;
        if (page == NULL)
                return ERR_PTR(-EIO);
-        return (struct nfs_cache_array *)kmap(page);
+        ptr = kmap(page);
+        if (ptr == NULL)
+                return ERR_PTR(-ENOMEM);
+        return ptr;
 }
 static
@@ -213,6 +218,9 @@ int nfs_readdir_clear_array(struct page *page, gfp_t mask)
 {
        struct nfs_cache_array *array = nfs_readdir_get_array(page);
        int i;
+        if (IS_ERR(array))
+                return PTR_ERR(array);
        for (i = 0; i < array->size; i++)
                kfree(array->array[i].string.name);
        nfs_readdir_release_array(page);
@@ -231,6 +239,11 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
        string->name = kmemdup(name, len, GFP_KERNEL);
        if (string->name == NULL)
                return -ENOMEM;
+        /*
+         * Avoid a kmemleak false positive. The pointer to the name is stored
+         * in a page cache page which kmemleak does not scan.
+         */
+        kmemleak_not_leak(string->name);
        string->hash = full_name_hash(name, len);
        return 0;
 }
@@ -244,7 +257,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
        if (IS_ERR(array))
                return PTR_ERR(array);
-        ret = -EIO;
+        ret = -ENOSPC;
        if (array->size >= MAX_READDIR_ARRAY)
                goto out;
@@ -255,9 +268,9 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
        if (ret)
                goto out;
        array->last_cookie = entry->cookie;
+        array->size++;
        if (entry->eof == 1)
                array->eof_index = array->size;
-        array->size++;
 out:
        nfs_readdir_release_array(page);
        return ret;
@@ -272,7 +285,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
        if (diff < 0)
                goto out_eof;
        if (diff >= array->size) {
-                if (array->eof_index > 0)
+                if (array->eof_index >= 0)
                        goto out_eof;
                desc->current_index += array->size;
                return -EAGAIN;
@@ -281,8 +294,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
        index = (unsigned int)diff;
        *desc->dir_cookie = array->array[index].cookie;
        desc->cache_entry_index = index;
-        if (index == array->eof_index)
-                desc->eof = 1;
        return 0;
 out_eof:
        desc->eof = 1;
@@ -296,17 +307,17 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
        int status = -EAGAIN;
        for (i = 0; i < array->size; i++) {
-                if (i == array->eof_index) {
-                        desc->eof = 1;
-                        status = -EBADCOOKIE;
-                }
                if (array->array[i].cookie == *desc->dir_cookie) {
                        desc->cache_entry_index = i;
                        status = 0;
-                        break;
+                        goto out;
                }
        }
+        if (i == array->eof_index) {
+                desc->eof = 1;
+                status = -EBADCOOKIE;
+        }
+out:
        return status;
 }
@@ -449,7 +460,7 @@ out:
 /* Perform conversion from xdr to cache array */
 static
-void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
+int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
                                void *xdr_page, struct page *page, unsigned int buflen)
 {
        struct xdr_stream stream;
@@ -471,21 +482,29 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e
        do {
                status = xdr_decode(desc, entry, &stream);
-                if (status != 0)
+                if (status != 0) {
+                        if (status == -EAGAIN)
+                                status = 0;
                        break;
+                }
-                if (nfs_readdir_add_to_array(entry, page) == -1)
-                        break;
                if (desc->plus == 1)
                        nfs_prime_dcache(desc->file->f_path.dentry, entry);
+                status = nfs_readdir_add_to_array(entry, page);
+                if (status != 0)
+                        break;
        } while (!entry->eof);
        if (status == -EBADCOOKIE && entry->eof) {
                array = nfs_readdir_get_array(page);
-                array->eof_index = array->size - 1;
+                if (!IS_ERR(array)) {
-                status = 0;
+                        array->eof_index = array->size;
-                nfs_readdir_release_array(page);
+                        status = 0;
+                        nfs_readdir_release_array(page);
+                }
        }
+        return status;
 }
 static
@@ -537,7 +556,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
        struct nfs_entry entry;
        struct file     *file = desc->file;
        struct nfs_cache_array *array;
-        int status = 0;
+        int status = -ENOMEM;
        unsigned int array_size = ARRAY_SIZE(pages);
        entry.prev_cookie = 0;
@@ -549,6 +568,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
                goto out;
        array = nfs_readdir_get_array(page);
+        if (IS_ERR(array)) {
+                status = PTR_ERR(array);
+                goto out;
+        }
        memset(array, 0, sizeof(struct nfs_cache_array));
        array->eof_index = -1;
@@ -556,12 +579,19 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
        if (!pages_ptr)
                goto out_release_array;
        do {
+                unsigned int pglen;
                status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
                if (status < 0)
                        break;
-                nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
+                pglen = status;
-        } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
+                status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+                if (status < 0) {
+                        if (status == -ENOSPC)
+                                status = 0;
+                        break;
+                }
+        } while (array->eof_index < 0);
        nfs_readdir_free_large_page(pages_ptr, pages, array_size);
 out_release_array:
@@ -582,8 +612,10 @@ static
 int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
        struct inode    *inode = desc->file->f_path.dentry->d_inode;
+        int ret;
-        if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
+        ret = nfs_readdir_xdr_to_array(desc, page, inode);
+        if (ret < 0)
                goto error;
        SetPageUptodate(page);
@@ -595,7 +627,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
        return 0;
 error:
        unlock_page(page);
-        return -EIO;
+        return ret;
 }
 static
@@ -608,12 +640,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-        struct page *page;
+        return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
-        page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
                        desc->page_index, (filler_t *)nfs_readdir_filler, desc);
-        if (IS_ERR(page))
-                desc->eof = 1;
-        return page;
 }
 /*
@@ -639,8 +667,10 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 {
-        int res = -EAGAIN;
+        int res;
+        if (desc->page_index == 0)
+                desc->current_index = 0;
        while (1) {
                res = find_cache_page(desc);
                if (res != -EAGAIN)
@@ -670,6 +700,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
        struct dentry *dentry = NULL;
        array = nfs_readdir_get_array(desc->page);
+        if (IS_ERR(array))
+                return PTR_ERR(array);
        for (i = desc->cache_entry_index; i < array->size; i++) {
                d_type = DT_UNKNOWN;
@@ -685,11 +717,9 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
                        *desc->dir_cookie = array->array[i+1].cookie;
                else
                        *desc->dir_cookie = array->last_cookie;
-                if (i == array->eof_index) {
-                        desc->eof = 1;
-                        break;
-                }
        }
+        if (i == array->eof_index)
+                desc->eof = 1;
        nfs_readdir_release_array(desc->page);
        cache_page_release(desc);
@@ -1345,12 +1375,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
                                res = NULL;
                                goto out;
                        /* This turned out not to be a regular file */
-                        case -EISDIR:
                        case -ENOTDIR:
                                goto no_open;
                        case -ELOOP:
                                if (!(nd->intent.open.flags & O_NOFOLLOW))
                                        goto no_open;
+                        /* case -EISDIR: */
                        /* case -EINVAL: */
                        default:
                                res = ERR_CAST(inode);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index e6bf45710cc7..2563f765c9b4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -423,7 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
        struct page **page;
        size_t hdrlen;
        unsigned int pglen, recvd;
-        int status, nr = 0;
+        int status;
        if ((status = ntohl(*p++)))
                return nfs_stat_to_errno(status);
@@ -443,7 +443,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
        if (pglen > recvd)
                pglen = recvd;
        page = rcvbuf->pages;
-        return nr;
+        return pglen;
 }
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9a5e832c257..748dc91a4a14 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -555,7 +555,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
        struct page **page;
        size_t hdrlen;
        u32 recvd, pglen;
-        int status, nr = 0;
+        int status;
        status = ntohl(*p++);
        /* Decode post_op_attrs */
@@ -586,7 +586,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
                pglen = recvd;
        page = rcvbuf->pages;
-        return nr;
+        return pglen;
 }
 __be32 *
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0f24cdf2cb13..6a653ffd8e4e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2852,8 +2852,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
        nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
        res.pgbase = args.pgbase;
        status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
-        if (status == 0)
+        if (status >= 0) {
                memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+                status += args.pgbase;
+        }
        nfs_invalidate_atime(dir);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f313c4cce7e4..b7a204ff6fe1 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4518,7 +4518,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
        xdr_read_pages(xdr, pglen);
-        return 0;
+        return pglen;
 }
 static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0a42e8f4adcb..3c045044fca2 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -39,7 +39,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/lockd/bind.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/mnt_namespace.h>
@@ -67,6 +66,12 @@
 #define NFSDBG_FACILITY         NFSDBG_VFS
+#ifdef CONFIG_NFS_V3
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
 enum {
        /* Mount options that take no arguments */
        Opt_soft, Opt_hard,
@@ -2277,7 +2282,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
        };
        int error = -ENOMEM;
-        data = nfs_alloc_parsed_mount_data(3);
+        data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
        mntfh = nfs_alloc_fhandle();
        if (data == NULL || mntfh == NULL)
                goto out_free_fh;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f1e5ec6b5105..116cab970e0f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
        spin_unlock(&clp->cl_lock);
 }
-static void nfsd4_register_conn(struct nfsd4_conn *conn)
+static int nfsd4_register_conn(struct nfsd4_conn *conn)
 {
        conn->cn_xpt_user.callback = nfsd4_conn_lost;
-        register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
+        return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
 }
 static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
 {
        struct nfsd4_conn *conn;
        u32 flags = NFS4_CDFC4_FORE;
+        int ret;
        if (ses->se_flags & SESSION4_BACK_CHAN)
                flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
        if (!conn)
                return nfserr_jukebox;
        nfsd4_hash_conn(conn, ses);
-        nfsd4_register_conn(conn);
+        ret = nfsd4_register_conn(conn);
+        if (ret)
+                /* oops; xprt is already down: */
+                nfsd4_conn_lost(&conn->cn_xpt_user);
        return nfs_ok;
 }
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
 {
        struct nfs4_client *clp = ses->se_client;
        struct nfsd4_conn *c;
+        int ret;
        spin_lock(&clp->cl_lock);
        c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
        }
        __nfsd4_hash_conn(new, ses);
        spin_unlock(&clp->cl_lock);
-        nfsd4_register_conn(new);
+        ret = nfsd4_register_conn(new);
+        if (ret)
+                /* oops; xprt is already down: */
+                nfsd4_conn_lost(&new->cn_xpt_user);
        return;
 }
@@ -2254,7 +2262,7 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
 * Spawn a thread to perform a recall on the delegation represented
 * by the lease (file_lock)
 *
- * Called from break_lease() with lock_kernel() held.
+ * Called from break_lease() with lock_flocks() held.
 * Note: we assume break_lease will only call this *once* for any given
 * lease.
 */
@@ -2278,7 +2286,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
        list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
        spin_unlock(&recall_lock);
-        /* only place dl_time is set. protected by lock_kernel*/
+        /* only place dl_time is set. protected by lock_flocks*/
        dp->dl_time = get_seconds();
        /*
@@ -2295,7 +2303,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
 /*
 * The file_lock is being reapd.
 *
- * Called by locks_free_lock() with lock_kernel() held.
+ * Called by locks_free_lock() with lock_flocks() held.
 */
 static
 void nfsd_release_deleg_cb(struct file_lock *fl)
@@ -2310,7 +2318,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
 }
 /*
- * Called from setlease() with lock_kernel() held
+ * Called from setlease() with lock_flocks() held
 */
 static
 int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d8408217e3bd..1efea3615589 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -159,7 +159,9 @@ struct ocfs2_lock_res {
        char                     l_name[OCFS2_LOCK_ID_MAX_LEN];
        unsigned int             l_ro_holders;
        unsigned int             l_ex_holders;
-        unsigned char            l_level;
+        char                     l_level;
+        char                     l_requested;
+        char                     l_blocking;
        /* Data packed - type enum ocfs2_lock_type */
        unsigned char            l_type;
@@ -169,8 +171,6 @@ struct ocfs2_lock_res {
        unsigned char            l_action;
        /* Data packed - enum type ocfs2_unlock_action */
        unsigned char            l_unlock_action;
-        unsigned char            l_requested;
-        unsigned char            l_blocking;
        unsigned int             l_pending_gen;
        spinlock_t               l_lock;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index f02c0ef31578..cfeab7ce3697 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,7 +41,6 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
-#include <linux/smp_lock.h>
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ddb1f41376e5..911e61f348fc 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -418,7 +418,7 @@ out_no_root:
 static struct dentry *openprom_mount(struct file_system_type *fs_type,
        int flags, const char *dev_name, void *data)
 {
-        return mount_single(fs_type, flags, data, openprom_fill_super)
+        return mount_single(fs_type, flags, data, openprom_fill_super);
 }
 static struct file_system_type openprom_fs_type = {
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9c2b5f484879..3ddb6068177c 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -16,7 +16,6 @@
 #include <linux/limits.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
diff --git a/fs/read_write.c b/fs/read_write.c
index 431a0ed610c8..5d431bacbea9 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/smp_lock.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/module.h>
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 41656d40dc5c..0bae036831e2 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -8,7 +8,6 @@
 #include <linux/reiserfs_acl.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/exportfs.h>
-#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index adf22b485cea..bd9763e76bae 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,6 @@
 #include <linux/time.h>
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/compat.h>
 /*
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 076c8b194682..d31bce1a9f90 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -43,7 +43,6 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/workqueue.h>
 #include <linux/writeback.h>
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3bf7a6457f4d..b243117b8752 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,7 +28,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
-#include <linux/smp_lock.h>
 struct file_system_type reiserfs_fs_type;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index c9af48fffcd7..7d287afccde5 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1111,11 +1111,12 @@ xfs_vm_writepage(
                        uptodate = 0;
                /*
-                 * A hole may still be marked uptodate because discard_buffer
+                 * set_page_dirty dirties all buffers in a page, independent
-                 * leaves the flag set.
+                 * of their state.  The dirty state however is entirely
+                 * meaningless for holes (!mapped && uptodate), so skip
+                 * buffers covering holes here.
                 */
                if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-                        ASSERT(!buffer_dirty(bh));
                        imap_valid = 0;
                        continue;
                }
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 63fd2c07cb57..aa1d353def29 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1781,7 +1781,6 @@ xfs_buf_delwri_split(
        INIT_LIST_HEAD(list);
        spin_lock(dwlk);
        list_for_each_entry_safe(bp, n, dwq, b_list) {
-                trace_xfs_buf_delwri_split(bp, _RET_IP_);
                ASSERT(bp->b_flags & XBF_DELWRI);
                if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1795,6 +1794,7 @@ xfs_buf_delwri_split(
                                         _XBF_RUN_QUEUES);
                        bp->b_flags |= XBF_WRITE;
                        list_move_tail(&bp->b_list, list);
+                        trace_xfs_buf_delwri_split(bp, _RET_IP_);
                } else
                        skipped++;
        }
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38e..ad442d9e392e 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
-        kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+        kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
        if (!kbuf)
                goto out_dput;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 96107efc0c61..94d5fd6a2973 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -762,7 +762,8 @@ xfs_setup_inode(
        inode->i_state = I_NEW;
        inode_sb_list_add(inode);
-        insert_inode_hash(inode);
+        /* make the inode look hashed for the writeback code */
+        hlist_add_fake(&inode->i_hash);
        inode->i_mode   = ip->i_d.di_mode;
        inode->i_nlink  = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 9f3a78fe6ae4..064f964d4f3c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
                        mp->m_qflags &= ~XFS_OQUOTA_ENFD;
                } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
                        mp->m_flags |= XFS_MOUNT_DELAYLOG;
-                        cmn_err(CE_WARN,
-                                "Enabling EXPERIMENTAL delayed logging feature "
-                                "- use at your own risk.\n");
                } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
                        mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
                } else if (!strcmp(this_char, "ihashsize")) {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981d..afb0d7cfad1c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
                if (trylock) {
                        if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
                                skipped++;
+                                xfs_perag_put(pag);
                                continue;
                        }
                        first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce5699..9124425b7f2f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
         * If the file's parent directory is known, take its iolock in exclusive
         * mode to prevent two sibling files from racing each other to migrate
         * themselves and their parent to different AGs.
+         *
+         * Note that we lock the parent directory iolock inside the child
+         * iolock here.  That's fine as we never hold both parent and child
+         * iolock in any other place.  This is different from the ilock,
+         * which requires locking of the child after the parent for namespace
+         * operations.
         */
        if (pip)
-                xfs_ilock(pip, XFS_IOLOCK_EXCL);
+                xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
        /*
         * A new AG needs to be found for the file.  If the file's parent
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a399..19e9dfa1c254 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
                pag = radix_tree_delete(&mp->m_perag_tree, agno);
                spin_unlock(&mp->m_perag_lock);
                ASSERT(pag);
+                ASSERT(atomic_read(&pag->pag_ref) == 0);
                call_rcu(&pag->rcu_head, __xfs_free_perag);
        }
 }
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd6..9bb6eda4cd21 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
 #define xfs_trans_apply_dquot_deltas(tp)
 #define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)      (0)
+static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
-#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)      (0)
+                struct xfs_inode *ip, long nblks, long ninos, uint flags)
+{
+        return 0;
+}
+static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
+                struct xfs_mount *mp, struct xfs_dquot *udqp,
+                struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
+{
+        return 0;
+}
 #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
 #define xfs_qm_vop_rename_dqattach(it)                                  (0)
 #define xfs_qm_vop_chown(tp, ip, old, new)                              (NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
 #define xfs_qm_dqdetach(ip)
 #define xfs_qm_dqrele(d)
 #define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, fl)                                             (0)
+static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
+{
+        return 0;
+}
 #define xfs_qm_newmount(mp, a, b)                                       (0)
 #define xfs_qm_mount_quotas(mp)
 #define xfs_qm_unmount(mp)
-#define xfs_qm_unmount_quotas(mp)                                       (0)
+#define xfs_qm_unmount_quotas(mp)
 #endif /* CONFIG_XFS_QUOTA */
 #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
author	Takashi Iwai <tiwai@suse.de>	2010-11-29 01:44:01 -0500
committer	Takashi Iwai <tiwai@suse.de>	2010-11-29 01:44:01 -0500
commit	ca19e77e44985b5500f5461f7d2f4ce799cb60ce (patch)
tree	3ba3635ac2f212b332198b14cc3239195c153e67 /fs
parent	9d57883f08d3c0c111b50bf185dfee9731a12c76 (diff)
parent	ac70eb1305d5a81efd1e32327d7e79be15a63a5a (diff)