38 files changed, 821 insertions, 348 deletions
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index ba1c88af49fe..82011019494c 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -308,7 +308,7 @@ static struct adfs_discmap *adfs_read_map(struct super_block *sb, struct adfs_di
        if (adfs_checkmap(sb, dm))
                return dm;
-        adfs_error(sb, NULL, "map corrupted");
+        adfs_error(sb, "map corrupted");
 error_free:
        while (--zone >= 0)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 37534573960b..045f98854f14 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -884,6 +884,61 @@ void bd_set_size(struct block_device *bdev, loff_t size)
 }
 EXPORT_SYMBOL(bd_set_size);
+static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
+{
+        int ret = 0;
+        struct inode *bd_inode = bdev->bd_inode;
+        struct gendisk *disk = bdev->bd_disk;
+        mutex_lock_nested(&bdev->bd_mutex, subclass);
+        lock_kernel();
+        if (!--bdev->bd_openers) {
+                sync_blockdev(bdev);
+                kill_bdev(bdev);
+        }
+        if (bdev->bd_contains == bdev) {
+                if (disk->fops->release)
+                        ret = disk->fops->release(bd_inode, NULL);
+        } else {
+                mutex_lock_nested(&bdev->bd_contains->bd_mutex,
+                                  subclass + 1);
+                bdev->bd_contains->bd_part_count--;
+                mutex_unlock(&bdev->bd_contains->bd_mutex);
+        }
+        if (!bdev->bd_openers) {
+                struct module *owner = disk->fops->owner;
+                put_disk(disk);
+                module_put(owner);
+                if (bdev->bd_contains != bdev) {
+                        kobject_put(&bdev->bd_part->kobj);
+                        bdev->bd_part = NULL;
+                }
+                bdev->bd_disk = NULL;
+                bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+                if (bdev != bdev->bd_contains)
+                        __blkdev_put(bdev->bd_contains, subclass + 1);
+                bdev->bd_contains = NULL;
+        }
+        unlock_kernel();
+        mutex_unlock(&bdev->bd_mutex);
+        bdput(bdev);
+        return ret;
+}
+int blkdev_put(struct block_device *bdev)
+{
+        return __blkdev_put(bdev, BD_MUTEX_NORMAL);
+}
+EXPORT_SYMBOL(blkdev_put);
+int blkdev_put_partition(struct block_device *bdev)
+{
+        return __blkdev_put(bdev, BD_MUTEX_PARTITION);
+}
+EXPORT_SYMBOL(blkdev_put_partition);
 static int
 blkdev_get_whole(struct block_device *bdev, mode_t mode, unsigned flags);
@@ -980,7 +1035,7 @@ out_first:
        bdev->bd_disk = NULL;
        bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
        if (bdev != bdev->bd_contains)
-                blkdev_put(bdev->bd_contains);
+                __blkdev_put(bdev->bd_contains, BD_MUTEX_WHOLE);
        bdev->bd_contains = NULL;
        put_disk(disk);
        module_put(owner);
@@ -1079,63 +1134,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
        return res;
 }
-static int __blkdev_put(struct block_device *bdev, unsigned int subclass)
-{
-        int ret = 0;
-        struct inode *bd_inode = bdev->bd_inode;
-        struct gendisk *disk = bdev->bd_disk;
-        mutex_lock_nested(&bdev->bd_mutex, subclass);
-        lock_kernel();
-        if (!--bdev->bd_openers) {
-                sync_blockdev(bdev);
-                kill_bdev(bdev);
-        }
-        if (bdev->bd_contains == bdev) {
-                if (disk->fops->release)
-                        ret = disk->fops->release(bd_inode, NULL);
-        } else {
-                mutex_lock_nested(&bdev->bd_contains->bd_mutex,
-                                  subclass + 1);
-                bdev->bd_contains->bd_part_count--;
-                mutex_unlock(&bdev->bd_contains->bd_mutex);
-        }
-        if (!bdev->bd_openers) {
-                struct module *owner = disk->fops->owner;
-                put_disk(disk);
-                module_put(owner);
-                if (bdev->bd_contains != bdev) {
-                        kobject_put(&bdev->bd_part->kobj);
-                        bdev->bd_part = NULL;
-                }
-                bdev->bd_disk = NULL;
-                bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
-                if (bdev != bdev->bd_contains)
-                        __blkdev_put(bdev->bd_contains, subclass + 1);
-                bdev->bd_contains = NULL;
-        }
-        unlock_kernel();
-        mutex_unlock(&bdev->bd_mutex);
-        bdput(bdev);
-        return ret;
-}
-int blkdev_put(struct block_device *bdev)
-{
-        return __blkdev_put(bdev, BD_MUTEX_NORMAL);
-}
-EXPORT_SYMBOL(blkdev_put);
-int blkdev_put_partition(struct block_device *bdev)
-{
-        return __blkdev_put(bdev, BD_MUTEX_PARTITION);
-}
-EXPORT_SYMBOL(blkdev_put_partition);
 static int blkdev_close(struct inode * inode, struct file * filp)
 {
        struct block_device *bdev = I_BDEV(filp->f_mapping->host);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 19ffb043abbc..3a3567433b92 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1168,7 +1168,7 @@ static int ep_unlink(struct eventpoll *ep, struct epitem *epi)
 eexit_1:
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_unlink(%p, %p) = %d\n",
-                     current, ep, epi->file, error));
+                     current, ep, epi->ffd.file, error));
        return error;
 }
@@ -1236,7 +1236,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
        struct eventpoll *ep = epi->ep;
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: poll_callback(%p) epi=%p ep=%p\n",
-                     current, epi->file, epi, ep));
+                     current, epi->ffd.file, epi, ep));
        write_lock_irqsave(&ep->lock, flags);
diff --git a/fs/exec.c b/fs/exec.c
index 8344ba73a2a6..54135df2a966 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -486,8 +486,6 @@ struct file *open_exec(const char *name)
                if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
                    S_ISREG(inode->i_mode)) {
                        int err = vfs_permission(&nd, MAY_EXEC);
-                        if (!err && !(inode->i_mode & 0111))
-                                err = -EACCES;
                        file = ERR_PTR(err);
                        if (!err) {
                                file = nameidata_to_filp(&nd, O_RDONLY);
@@ -753,7 +751,7 @@ no_thread_group:
                write_lock_irq(&tasklist_lock);
                spin_lock(&oldsighand->siglock);
-                spin_lock(&newsighand->siglock);
+                spin_lock_nested(&newsighand->siglock, SINGLE_DEPTH_NESTING);
                rcu_assign_pointer(current->sighand, newsighand);
                recalc_sigpending();
@@ -922,12 +920,6 @@ int prepare_binprm(struct linux_binprm *bprm)
        int retval;
        mode = inode->i_mode;
-        /*
-         * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
-         * generic_permission lets a non-executable through
-         */
-        if (!(mode & 0111))     /* with at least _one_ execute bit set */
-                return -EACCES;
        if (bprm->file->f_op == NULL)
                return -EACCES;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f2702cda9779..681dea8f9532 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -775,7 +775,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
        if (EXT2_INODE_SIZE(sb) == 0)
                goto cantfind_ext2;
        sbi->s_inodes_per_block = sb->s_blocksize / EXT2_INODE_SIZE(sb);
-        if (sbi->s_inodes_per_block == 0)
+        if (sbi->s_inodes_per_block == 0 || sbi->s_inodes_per_group == 0)
                goto cantfind_ext2;
        sbi->s_itb_per_group = sbi->s_inodes_per_group /
                                        sbi->s_inodes_per_block;
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index a504a40d6d29..063d994bda0b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1269,12 +1269,12 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
                goal = le32_to_cpu(es->s_first_data_block);
        group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
                        EXT3_BLOCKS_PER_GROUP(sb);
+        goal_group = group_no;
+retry_alloc:
        gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
        if (!gdp)
                goto io_error;
-        goal_group = group_no;
-retry:
        free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
        /*
         * if there is not enough free blocks to make a new resevation
@@ -1349,7 +1349,7 @@ retry:
        if (my_rsv) {
                my_rsv = NULL;
                group_no = goal_group;
-                goto retry;
+                goto retry_alloc;
        }
        /* No space left on the device */
        *errp = -ENOSPC;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 63614ed16336..5c4fcd1dbf59 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -395,14 +395,16 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
        struct fuse_readpages_data data;
        int err;
+        err = -EIO;
        if (is_bad_inode(inode))
-                return -EIO;
+                goto clean_pages_up;
        data.file = file;
        data.inode = inode;
        data.req = fuse_get_req(fc);
+        err = PTR_ERR(data.req);
        if (IS_ERR(data.req))
-                return PTR_ERR(data.req);
+                goto clean_pages_up;
        err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
        if (!err) {
@@ -412,6 +414,10 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
                        fuse_put_request(fc, data.req);
        }
        return err;
+clean_pages_up:
+        put_pages_list(pages);
+        return err;
 }
 static size_t fuse_send_write(struct fuse_req *req, struct file *file,
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 93aa5715f224..78b1deae3fa2 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -44,6 +44,9 @@ static int set_task_ioprio(struct task_struct *task, int ioprio)
        task->ioprio = ioprio;
        ioc = task->io_context;
+        /* see wmb() in current_io_context() */
+        smp_read_barrier_depends();
        if (ioc && ioc->set_ioprio)
                ioc->set_ioprio(ioc, ioprio);
@@ -111,9 +114,9 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
                                        continue;
                                ret = set_task_ioprio(p, ioprio);
                                if (ret)
-                                        break;
+                                        goto free_uid;
                        } while_each_thread(g, p);
+free_uid:
                        if (who)
                                free_uid(user);
                        break;
@@ -137,6 +140,29 @@ out:
        return ret;
 }
+int ioprio_best(unsigned short aprio, unsigned short bprio)
+{
+        unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
+        unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+        if (!ioprio_valid(aprio))
+                return bprio;
+        if (!ioprio_valid(bprio))
+                return aprio;
+        if (aclass == IOPRIO_CLASS_NONE)
+                aclass = IOPRIO_CLASS_BE;
+        if (bclass == IOPRIO_CLASS_NONE)
+                bclass = IOPRIO_CLASS_BE;
+        if (aclass == bclass)
+                return min(aprio, bprio);
+        if (aclass > bclass)
+                return bprio;
+        else
+                return aprio;
+}
 asmlinkage long sys_ioprio_get(int which, int who)
 {
        struct task_struct *g, *p;
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 0971814c38b8..42da60784311 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -261,7 +261,7 @@ void journal_commit_transaction(journal_t *journal)
                        struct buffer_head *bh = jh2bh(jh);
                        jbd_lock_bh_state(bh);
-                        kfree(jh->b_committed_data);
+                        jbd_slab_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        jbd_unlock_bh_state(bh);
                }
@@ -745,14 +745,14 @@ restart_loop:
                 * Otherwise, we can just throw away the frozen data now.
                 */
                if (jh->b_committed_data) {
-                        kfree(jh->b_committed_data);
+                        jbd_slab_free(jh->b_committed_data, bh->b_size);
                        jh->b_committed_data = NULL;
                        if (jh->b_frozen_data) {
                                jh->b_committed_data = jh->b_frozen_data;
                                jh->b_frozen_data = NULL;
                        }
                } else if (jh->b_frozen_data) {
-                        kfree(jh->b_frozen_data);
+                        jbd_slab_free(jh->b_frozen_data, bh->b_size);
                        jh->b_frozen_data = NULL;
                }
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 8c9b28dff119..f66724ce443a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL(journal_force_commit);
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
 /*
 * Helper function used to manage commit timeouts
@@ -328,10 +329,10 @@ repeat:
                char *tmp;
                jbd_unlock_bh_state(bh_in);
-                tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
+                tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
                jbd_lock_bh_state(bh_in);
                if (jh_in->b_frozen_data) {
-                        kfree(tmp);
+                        jbd_slab_free(tmp, bh_in->b_size);
                        goto repeat;
                }
@@ -1069,17 +1070,17 @@ static int load_superblock(journal_t *journal)
 int journal_load(journal_t *journal)
 {
        int err;
+        journal_superblock_t *sb;
        err = load_superblock(journal);
        if (err)
                return err;
+        sb = journal->j_superblock;
        /* If this is a V2 superblock, then we have to check the
         * features flags on it. */
        if (journal->j_format_version >= 2) {
-                journal_superblock_t *sb = journal->j_superblock;
                if ((sb->s_feature_ro_compat &
                     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
                    (sb->s_feature_incompat &
@@ -1090,6 +1091,13 @@ int journal_load(journal_t *journal)
                }
        }
+        /*
+         * Create a slab for this blocksize
+         */
+        err = journal_create_jbd_slab(cpu_to_be32(sb->s_blocksize));
+        if (err)
+                return err;
        /* Let the recovery code check whether it needs to recover any
         * data from the journal. */
        if (journal_recover(journal))
@@ -1612,6 +1620,77 @@ void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 }
 /*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size)  (size >> 11)
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+        "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+static void journal_destroy_jbd_slabs(void)
+{
+        int i;
+        for (i = 0; i < JBD_MAX_SLABS; i++) {
+                if (jbd_slab[i])
+                        kmem_cache_destroy(jbd_slab[i]);
+                jbd_slab[i] = NULL;
+        }
+}
+static int journal_create_jbd_slab(size_t slab_size)
+{
+        int i = JBD_SLAB_INDEX(slab_size);
+        BUG_ON(i >= JBD_MAX_SLABS);
+        /*
+         * Check if we already have a slab created for this size
+         */
+        if (jbd_slab[i])
+                return 0;
+        /*
+         * Create a slab and force alignment to be same as slabsize -
+         * this will make sure that allocations won't cross the page
+         * boundary.
+         */
+        jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+                                slab_size, slab_size, 0, NULL, NULL);
+        if (!jbd_slab[i]) {
+                printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+                return -ENOMEM;
+        }
+        return 0;
+}
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+        int idx;
+        idx = JBD_SLAB_INDEX(size);
+        BUG_ON(jbd_slab[idx] == NULL);
+        return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+void jbd_slab_free(void *ptr,  size_t size)
+{
+        int idx;
+        idx = JBD_SLAB_INDEX(size);
+        BUG_ON(jbd_slab[idx] == NULL);
+        kmem_cache_free(jbd_slab[idx], ptr);
+}
+/*
 * Journal_head storage management
 */
 static kmem_cache_t *journal_head_cache;
@@ -1799,13 +1878,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
                                printk(KERN_WARNING "%s: freeing "
                                                "b_frozen_data\n",
                                                __FUNCTION__);
-                                kfree(jh->b_frozen_data);
+                                jbd_slab_free(jh->b_frozen_data, bh->b_size);
                        }
                        if (jh->b_committed_data) {
                                printk(KERN_WARNING "%s: freeing "
                                                "b_committed_data\n",
                                                __FUNCTION__);
-                                kfree(jh->b_committed_data);
+                                jbd_slab_free(jh->b_committed_data, bh->b_size);
                        }
                        bh->b_private = NULL;
                        jh->b_bh = NULL;        /* debug, really */
@@ -1961,6 +2040,7 @@ static void journal_destroy_caches(void)
        journal_destroy_revoke_caches();
        journal_destroy_journal_head_cache();
        journal_destroy_handle_cache();
+        journal_destroy_jbd_slabs();
 }
 static int __init journal_init(void)
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 508b2ea91f43..de2e4cbbf79a 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -666,8 +666,9 @@ repeat:
                        if (!frozen_buffer) {
                                JBUFFER_TRACE(jh, "allocate memory for buffer");
                                jbd_unlock_bh_state(bh);
-                                frozen_buffer = jbd_kmalloc(jh2bh(jh)->b_size,
+                                frozen_buffer =
-                                                            GFP_NOFS);
+                                        jbd_slab_alloc(jh2bh(jh)->b_size,
+                                                         GFP_NOFS);
                                if (!frozen_buffer) {
                                        printk(KERN_EMERG
                                               "%s: OOM for frozen_buffer\n",
@@ -879,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 repeat:
        if (!jh->b_committed_data) {
-                committed_data = jbd_kmalloc(jh2bh(jh)->b_size, GFP_NOFS);
+                committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
                if (!committed_data) {
                        printk(KERN_EMERG "%s: No memory for committed data\n",
                                __FUNCTION__);
@@ -906,7 +907,7 @@ repeat:
 out:
        journal_put_journal_head(jh);
        if (unlikely(committed_data))
-                kfree(committed_data);
+                jbd_slab_free(committed_data, bh->b_size);
        return err;
 }
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 43e3f566aad6..a223cf4faa9b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -168,16 +168,15 @@ void jfs_dirty_inode(struct inode *inode)
        set_cflag(COMMIT_Dirty, inode);
 }
-static int
+int jfs_get_block(struct inode *ip, sector_t lblock,
-jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
+                  struct buffer_head *bh_result, int create)
-                        struct buffer_head *bh_result, int create)
 {
        s64 lblock64 = lblock;
        int rc = 0;
        xad_t xad;
        s64 xaddr;
        int xflag;
-        s32 xlen = max_blocks;
+        s32 xlen = bh_result->b_size >> ip->i_blkbits;
        /*
         * Take appropriate lock on inode
@@ -188,7 +187,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
                IREAD_LOCK(ip);
        if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
-            (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
+            (!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
            xaddr) {
                if (xflag & XAD_NOTRECORDED) {
                        if (!create)
@@ -255,13 +254,6 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
        return rc;
 }
-static int jfs_get_block(struct inode *ip, sector_t lblock,
-                         struct buffer_head *bh_result, int create)
-{
-        return jfs_get_blocks(ip, lblock, bh_result->b_size >> ip->i_blkbits,
-                        bh_result, create);
-}
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
 {
        return nobh_writepage(page, jfs_get_block, wbc);
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index b5c7da6190dc..1fc48df670c8 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -32,6 +32,7 @@ extern void jfs_truncate_nolock(struct inode *, loff_t);
 extern void jfs_free_zero_link(struct inode *);
 extern struct dentry *jfs_get_parent(struct dentry *dentry);
 extern void jfs_set_inode_flags(struct inode *);
+extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern const struct address_space_operations jfs_aops;
 extern struct inode_operations jfs_dir_inode_operations;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f6cfebc82db..143bcd1d5eaa 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -26,6 +26,7 @@
 #include <linux/moduleparam.h>
 #include <linux/kthread.h>
 #include <linux/posix_acl.h>
+#include <linux/buffer_head.h>
 #include <asm/uaccess.h>
 #include <linux/seq_file.h>
@@ -298,7 +299,7 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize,
                        break;
                }
-#if defined(CONFIG_QUOTA)
+#ifdef CONFIG_QUOTA
                case Opt_quota:
                case Opt_usrquota:
                        *flag |= JFS_USRQUOTA;
@@ -597,7 +598,7 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
        if (sbi->flag & JFS_NOINTEGRITY)
                seq_puts(seq, ",nointegrity");
-#if defined(CONFIG_QUOTA)
+#ifdef CONFIG_QUOTA
        if (sbi->flag & JFS_USRQUOTA)
                seq_puts(seq, ",usrquota");
@@ -608,6 +609,113 @@ static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
        return 0;
 }
+#ifdef CONFIG_QUOTA
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and noone else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
+                              size_t len, loff_t off)
+{
+        struct inode *inode = sb_dqopt(sb)->files[type];
+        sector_t blk = off >> sb->s_blocksize_bits;
+        int err = 0;
+        int offset = off & (sb->s_blocksize - 1);
+        int tocopy;
+        size_t toread;
+        struct buffer_head tmp_bh;
+        struct buffer_head *bh;
+        loff_t i_size = i_size_read(inode);
+        if (off > i_size)
+                return 0;
+        if (off+len > i_size)
+                len = i_size-off;
+        toread = len;
+        while (toread > 0) {
+                tocopy = sb->s_blocksize - offset < toread ?
+                                sb->s_blocksize - offset : toread;
+                tmp_bh.b_state = 0;
+                tmp_bh.b_size = 1 << inode->i_blkbits;
+                err = jfs_get_block(inode, blk, &tmp_bh, 0);
+                if (err)
+                        return err;
+                if (!buffer_mapped(&tmp_bh))    /* A hole? */
+                        memset(data, 0, tocopy);
+                else {
+                        bh = sb_bread(sb, tmp_bh.b_blocknr);
+                        if (!bh)
+                                return -EIO;
+                        memcpy(data, bh->b_data+offset, tocopy);
+                        brelse(bh);
+                }
+                offset = 0;
+                toread -= tocopy;
+                data += tocopy;
+                blk++;
+        }
+        return len;
+}
+/* Write to quotafile */
+static ssize_t jfs_quota_write(struct super_block *sb, int type,
+                               const char *data, size_t len, loff_t off)
+{
+        struct inode *inode = sb_dqopt(sb)->files[type];
+        sector_t blk = off >> sb->s_blocksize_bits;
+        int err = 0;
+        int offset = off & (sb->s_blocksize - 1);
+        int tocopy;
+        size_t towrite = len;
+        struct buffer_head tmp_bh;
+        struct buffer_head *bh;
+        mutex_lock(&inode->i_mutex);
+        while (towrite > 0) {
+                tocopy = sb->s_blocksize - offset < towrite ?
+                                sb->s_blocksize - offset : towrite;
+                tmp_bh.b_state = 0;
+                tmp_bh.b_size = 1 << inode->i_blkbits;
+                err = jfs_get_block(inode, blk, &tmp_bh, 1);
+                if (err)
+                        goto out;
+                if (offset || tocopy != sb->s_blocksize)
+                        bh = sb_bread(sb, tmp_bh.b_blocknr);
+                else
+                        bh = sb_getblk(sb, tmp_bh.b_blocknr);
+                if (!bh) {
+                        err = -EIO;
+                        goto out;
+                }
+                lock_buffer(bh);
+                memcpy(bh->b_data+offset, data, tocopy);
+                flush_dcache_page(bh->b_page);
+                set_buffer_uptodate(bh);
+                mark_buffer_dirty(bh);
+                unlock_buffer(bh);
+                brelse(bh);
+                offset = 0;
+                towrite -= tocopy;
+                data += tocopy;
+                blk++;
+        }
+out:
+        if (len == towrite)
+                return err;
+        if (inode->i_size < off+len-towrite)
+                i_size_write(inode, off+len-towrite);
+        inode->i_version++;
+        inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+        mark_inode_dirty(inode);
+        mutex_unlock(&inode->i_mutex);
+        return len - towrite;
+}
+#endif
 static struct super_operations jfs_super_operations = {
        .alloc_inode    = jfs_alloc_inode,
        .destroy_inode  = jfs_destroy_inode,
@@ -621,7 +729,11 @@ static struct super_operations jfs_super_operations = {
        .unlockfs       = jfs_unlockfs,
        .statfs         = jfs_statfs,
        .remount_fs     = jfs_remount,
-        .show_options   = jfs_show_options
+        .show_options   = jfs_show_options,
+#ifdef CONFIG_QUOTA
+        .quota_read     = jfs_quota_read,
+        .quota_write    = jfs_quota_write,
+#endif
 };
 static struct export_operations jfs_export_operations = {
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2a4df9b3779a..01b4db9e5466 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -237,19 +237,22 @@ static int
 nlm_traverse_files(struct nlm_host *host, int action)
 {
        struct nlm_file *file, **fp;
-        int             i;
+        int i, ret = 0;
        mutex_lock(&nlm_file_mutex);
        for (i = 0; i < FILE_NRHASH; i++) {
                fp = nlm_files + i;
                while ((file = *fp) != NULL) {
+                        file->f_count++;
+                        mutex_unlock(&nlm_file_mutex);
                        /* Traverse locks, blocks and shares of this file
                         * and update file->f_locks count */
-                        if (nlm_inspect_file(host, file, action)) {
+                        if (nlm_inspect_file(host, file, action))
-                                mutex_unlock(&nlm_file_mutex);
+                                ret = 1;
-                                return 1;
-                        }
+                        mutex_lock(&nlm_file_mutex);
+                        file->f_count--;
                        /* No more references to this file. Let go of it. */
                        if (!file->f_blocks && !file->f_locks
                         && !file->f_shares && !file->f_count) {
@@ -262,7 +265,7 @@ nlm_traverse_files(struct nlm_host *host, int action)
                }
        }
        mutex_unlock(&nlm_file_mutex);
-        return 0;
+        return ret;
 }
 /*
diff --git a/fs/locks.c b/fs/locks.c
index b0b41a64e10b..d7c53392cac1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1421,8 +1421,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
        if (!leases_enable)
                goto out;
-        error = lease_alloc(filp, arg, &fl);
+        error = -ENOMEM;
-        if (error)
+        fl = locks_alloc_lock();
+        if (fl == NULL)
                goto out;
        locks_copy_lock(fl, lease);
@@ -1430,6 +1431,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
        locks_insert_lock(before, fl);
        *flp = fl;
+        error = 0;
 out:
        return error;
 }
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 9ea91c5eeb7b..330ff9fc7cf0 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -204,6 +204,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
        /*
         * Allocate the buffer map to keep the superblock small.
         */
+        if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
+                goto out_illegal_sb;
        i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
        map = kmalloc(i, GFP_KERNEL);
        if (!map)
@@ -263,7 +265,7 @@ out_no_root:
 out_no_bitmap:
        printk("MINIX-fs: bad superblock or unable to read bitmaps\n");
-    out_freemap:
+out_freemap:
        for (i = 0; i < sbi->s_imap_blocks; i++)
                brelse(sbi->s_imap[i]);
        for (i = 0; i < sbi->s_zmap_blocks; i++)
@@ -276,11 +278,16 @@ out_no_map:
                printk("MINIX-fs: can't allocate map\n");
        goto out_release;
+out_illegal_sb:
+        if (!silent)
+                printk("MINIX-fs: bad superblock\n");
+        goto out_release;
 out_no_fs:
        if (!silent)
                printk("VFS: Can't find a Minix or Minix V2 filesystem "
                        "on device %s\n", s->s_id);
-    out_release:
+out_release:
        brelse(bh);
        goto out;
@@ -290,7 +297,7 @@ out_bad_hblock:
 out_bad_sb:
        printk("MINIX-fs: unable to read superblock\n");
- out:
+out:
        s->s_fs_info = NULL;
        kfree(sbi);
        return -EINVAL;
diff --git a/fs/namei.c b/fs/namei.c
index 55a131230f94..432d6bc6fab0 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -227,10 +227,10 @@ int generic_permission(struct inode *inode, int mask,
 int permission(struct inode *inode, int mask, struct nameidata *nd)
 {
+        umode_t mode = inode->i_mode;
        int retval, submask;
        if (mask & MAY_WRITE) {
-                umode_t mode = inode->i_mode;
                /*
                 * Nobody gets write access to a read-only fs.
@@ -247,6 +247,13 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
        }
+        /*
+         * MAY_EXEC on regular files requires special handling: We override
+         * filesystem execute permissions if the mode bits aren't set.
+         */
+        if ((mask & MAY_EXEC) && S_ISREG(mode) && !(mode & S_IXUGO))
+                return -EACCES;
        /* Ordinary permission routines do not understand MAY_APPEND. */
        submask = mask & ~MAY_APPEND;
        if (inode->i_op && inode->i_op->permission)
@@ -1767,6 +1774,8 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
        if (nd->last_type != LAST_NORM)
                goto fail;
        nd->flags &= ~LOOKUP_PARENT;
+        nd->flags |= LOOKUP_CREATE;
+        nd->intent.open.flags = O_EXCL;
        /*
         * Do the final lookup.
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index cc2b874ad5a4..48e892880d5b 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -312,7 +312,13 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-        return !nfs_wb_page(page->mapping->host, page);
+        if (gfp & __GFP_FS)
+                return !nfs_wb_page(page->mapping->host, page);
+        else
+                /*
+                 * Avoid deadlock on nfs_wait_on_request().
+                 */
+                return 0;
 }
 const struct address_space_operations nfs_file_aops = {
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index b81e7ed3c902..07a5dd57646e 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -130,9 +130,7 @@ nfs_idmap_delete(struct nfs4_client *clp)
        if (!idmap)
                return;
-        dput(idmap->idmap_dentry);
+        rpc_unlink(idmap->idmap_dentry);
-        idmap->idmap_dentry = NULL;
-        rpc_unlink(idmap->idmap_path);
        clp->cl_idmap = NULL;
        kfree(idmap);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e6ee97f19d81..153898e1331f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2668,7 +2668,7 @@ out:
        nfs4_set_cached_acl(inode, acl);
 }
-static inline ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
 {
        struct page *pages[NFS4ACL_MAXPAGES];
        struct nfs_getaclargs args = {
@@ -2721,6 +2721,19 @@ out_free:
        return ret;
 }
+static ssize_t nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
+{
+        struct nfs4_exception exception = { };
+        ssize_t ret;
+        do {
+                ret = __nfs4_get_acl_uncached(inode, buf, buflen);
+                if (ret >= 0)
+                        break;
+                ret = nfs4_handle_exception(NFS_SERVER(inode), ret, &exception);
+        } while (exception.retry);
+        return ret;
+}
 static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 {
        struct nfs_server *server = NFS_SERVER(inode);
@@ -2737,7 +2750,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
        return nfs4_get_acl_uncached(inode, buf, buflen);
 }
-static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
 {
        struct nfs_server *server = NFS_SERVER(inode);
        struct page *pages[NFS4ACL_MAXPAGES];
@@ -2763,6 +2776,18 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen
        return ret;
 }
+static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen)
+{
+        struct nfs4_exception exception = { };
+        int err;
+        do {
+                err = nfs4_handle_exception(NFS_SERVER(inode),
+                                __nfs4_proc_set_acl(inode, buf, buflen),
+                                &exception);
+        } while (exception.retry);
+        return err;
+}
 static int
 nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 1750d996f49f..730ec8fb31c6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3355,7 +3355,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
        struct kvec     *iov = rcvbuf->head;
        unsigned int    nr, pglen = rcvbuf->page_len;
        uint32_t        *end, *entry, *p, *kaddr;
-        uint32_t        len, attrlen;
+        uint32_t        len, attrlen, xlen;
        int             hdrlen, recvd, status;
        status = decode_op_hdr(xdr, OP_READDIR);
@@ -3377,10 +3377,10 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
        BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
        kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
-        end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+        end = p + ((pglen + readdir->pgbase) >> 2);
        entry = p;
        for (nr = 0; *p++; nr++) {
-                if (p + 3 > end)
+                if (end - p < 3)
                        goto short_pkt;
                dprintk("cookie = %Lu, ", *((unsigned long long *)p));
                p += 2;                 /* cookie */
@@ -3389,18 +3389,19 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
                        printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len);
                        goto err_unmap;
                }
-                dprintk("filename = %*s\n", len, (char *)p);
+                xlen = XDR_QUADLEN(len);
-                p += XDR_QUADLEN(len);
+                if (end - p < xlen + 1)
-                if (p + 1 > end)
                        goto short_pkt;
+                dprintk("filename = %*s\n", len, (char *)p);
+                p += xlen;
                len = ntohl(*p++);      /* bitmap length */
-                p += len;
+                if (end - p < len + 1)
-                if (p + 1 > end)
                        goto short_pkt;
+                p += len;
                attrlen = XDR_QUADLEN(ntohl(*p++));
-                p += attrlen;           /* attributes */
+                if (end - p < attrlen + 2)
-                if (p + 2 > end)
                        goto short_pkt;
+                p += attrlen;           /* attributes */
                entry = p;
        }
        if (!nr && (entry[0] != 0 || entry[1] == 0))
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 65c0c5b32351..da9cf11c326f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -116,10 +116,17 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
        pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
        base &= ~PAGE_CACHE_MASK;
        pglen = PAGE_CACHE_SIZE - base;
-        if (pglen < remainder)
+        for (;;) {
+                if (remainder <= pglen) {
+                        memclear_highpage_flush(*pages, base, remainder);
+                        break;
+                }
                memclear_highpage_flush(*pages, base, pglen);
-        else
+                pages++;
-                memclear_highpage_flush(*pages, base, remainder);
+                remainder -= pglen;
+                pglen = PAGE_CACHE_SIZE;
+                base = 0;
+        }
 }
 /*
@@ -476,6 +483,8 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
        unsigned int base = data->args.pgbase;
        struct page **pages;
+        if (data->res.eof)
+                count = data->args.count;
        if (unlikely(count == 0))
                return;
        pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
@@ -483,11 +492,7 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
        count += base;
        for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
                SetPageUptodate(*pages);
-        /*
+        if (count != 0)
-         * Was this an eof or a short read? If the latter, don't mark the page
-         * as uptodate yet.
-         */
-        if (count > 0 && (data->res.eof || data->args.count == data->res.count))
                SetPageUptodate(*pages);
 }
@@ -502,6 +507,8 @@ static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
        count += base;
        for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
                SetPageError(*pages);
+        if (count != 0)
+                SetPageError(*pages);
 }
 /*
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 1b8346dd0572..9503240ef0e5 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2375,7 +2375,6 @@ leave:
        mlog(0, "returning %d\n", ret);
        return ret;
 }
-EXPORT_SYMBOL_GPL(dlm_migrate_lockres);
 int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
 {
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b0c3134f4f70..37be4b2e0d4a 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -155,7 +155,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
        else
                status = dlm_get_unlock_actions(dlm, res, lock, lksb, &actions);
-        if (status != DLM_NORMAL)
+        if (status != DLM_NORMAL && (status != DLM_CANCELGRANT || !master_node))
                goto leave;
        /* By now this has been masked out of cancel requests. */
@@ -183,8 +183,7 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
                spin_lock(&lock->spinlock);
                /* if the master told us the lock was already granted,
                 * let the ast handle all of these actions */
-                if (status == DLM_NORMAL &&
+                if (status == DLM_CANCELGRANT) {
-                    lksb->status == DLM_CANCELGRANT) {
                        actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
                                     DLM_UNLOCK_REGRANT_LOCK|
                                     DLM_UNLOCK_CLEAR_CONVERT_TYPE);
@@ -349,14 +348,9 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
                                        vec, veclen, owner, &status);
        if (tmpret >= 0) {
                // successfully sent and received
-                if (status == DLM_CANCELGRANT)
+                if (status == DLM_FORWARD)
-                        ret = DLM_NORMAL;
-                else if (status == DLM_FORWARD) {
                        mlog(0, "master was in-progress.  retry\n");
-                        ret = DLM_FORWARD;
+                ret = status;
-                } else
-                        ret = status;
-                lksb->status = status;
        } else {
                mlog_errno(tmpret);
                if (dlm_is_host_down(tmpret)) {
@@ -372,7 +366,6 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
                        /* something bad.  this will BUG in ocfs2 */
                        ret = dlm_err_to_dlm_status(tmpret);
                }
-                lksb->status = ret;
        }
        return ret;
@@ -483,6 +476,10 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
        /* lock was found on queue */
        lksb = lock->lksb;
+        if (flags & (LKM_VALBLK|LKM_PUT_LVB) &&
+            lock->ml.type != LKM_EXMODE)
+                flags &= ~(LKM_VALBLK|LKM_PUT_LVB);
        /* unlockast only called on originating node */
        if (flags & LKM_PUT_LVB) {
                lksb->flags |= DLM_LKSB_PUT_LVB;
@@ -507,11 +504,8 @@ not_found:
                               "cookie=%u:%llu\n",
                               dlm_get_lock_cookie_node(unlock->cookie),
                               dlm_get_lock_cookie_seq(unlock->cookie));
-        else {
+        else
-                /* send the lksb->status back to the other node */
-                status = lksb->status;
                dlm_lock_put(lock);
-        }
 leave:
        if (res)
@@ -533,26 +527,22 @@ static enum dlm_status dlm_get_cancel_actions(struct dlm_ctxt *dlm,
        if (dlm_lock_on_list(&res->blocked, lock)) {
                /* cancel this outright */
-                lksb->status = DLM_NORMAL;
                status = DLM_NORMAL;
                *actions = (DLM_UNLOCK_CALL_AST |
                            DLM_UNLOCK_REMOVE_LOCK);
        } else if (dlm_lock_on_list(&res->converting, lock)) {
                /* cancel the request, put back on granted */
-                lksb->status = DLM_NORMAL;
                status = DLM_NORMAL;
                *actions = (DLM_UNLOCK_CALL_AST |
                            DLM_UNLOCK_REMOVE_LOCK |
                            DLM_UNLOCK_REGRANT_LOCK |
                            DLM_UNLOCK_CLEAR_CONVERT_TYPE);
        } else if (dlm_lock_on_list(&res->granted, lock)) {
-                /* too late, already granted.  DLM_CANCELGRANT */
+                /* too late, already granted. */
-                lksb->status = DLM_CANCELGRANT;
+                status = DLM_CANCELGRANT;
-                status = DLM_NORMAL;
                *actions = DLM_UNLOCK_CALL_AST;
        } else {
                mlog(ML_ERROR, "lock to cancel is not on any list!\n");
-                lksb->status = DLM_IVLOCKID;
                status = DLM_IVLOCKID;
                *actions = 0;
        }
@@ -569,13 +559,11 @@ static enum dlm_status dlm_get_unlock_actions(struct dlm_ctxt *dlm,
        /* unlock request */
        if (!dlm_lock_on_list(&res->granted, lock)) {
-                lksb->status = DLM_DENIED;
                status = DLM_DENIED;
                dlm_error(status);
                *actions = 0;
        } else {
                /* unlock granted lock */
-                lksb->status = DLM_NORMAL;
                status = DLM_NORMAL;
                *actions = (DLM_UNLOCK_FREE_LOCK |
                            DLM_UNLOCK_CALL_AST |
@@ -632,6 +620,8 @@ retry:
        spin_lock(&res->spinlock);
        is_master = (res->owner == dlm->node_num);
+        if (flags & LKM_VALBLK && lock->ml.type != LKM_EXMODE)
+                flags &= ~LKM_VALBLK;
        spin_unlock(&res->spinlock);
        if (is_master) {
@@ -665,7 +655,7 @@ retry:
        }
        if (call_ast) {
-                mlog(0, "calling unlockast(%p, %d)\n", data, lksb->status);
+                mlog(0, "calling unlockast(%p, %d)\n", data, status);
                if (is_master) {
                        /* it is possible that there is one last bast 
                         * pending.  make sure it is flushed, then
@@ -677,9 +667,12 @@ retry:
                        wait_event(dlm->ast_wq, 
                                   dlm_lock_basts_flushed(dlm, lock));
                }
-                (*unlockast)(data, lksb->status);
+                (*unlockast)(data, status);
        }
+        if (status == DLM_CANCELGRANT)
+                status = DLM_NORMAL;
        if (status == DLM_NORMAL) {
                mlog(0, "kicking the thread\n");
                dlm_kick_thread(dlm, res);
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 0d1973ea32b0..1f17a4d08287 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -840,6 +840,12 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
        mlog(0, "Allocating %u clusters for a new window.\n",
             ocfs2_local_alloc_window_bits(osb));
+        /* Instruct the allocation code to try the most recently used
+         * cluster group. We'll re-record the group used this pass
+         * below. */
+        ac->ac_last_group = osb->la_last_gd;
        /* we used the generic suballoc reserve function, but we set
         * everything up nicely, so there's no reason why we can't use
         * the more specific cluster api to claim bits. */
@@ -852,6 +858,8 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
                goto bail;
        }
+        osb->la_last_gd = ac->ac_last_group;
        la->la_bm_off = cpu_to_le32(cluster_off);
        alloc->id1.bitmap1.i_total = cpu_to_le32(cluster_count);
        /* just in case... In the future when we find space ourselves,
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index cd4a6f253d13..0462a7f4e21b 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -197,7 +197,6 @@ struct ocfs2_super
        struct ocfs2_node_map recovery_map;
        struct ocfs2_node_map umount_map;
-        u32 num_clusters;
        u64 root_blkno;
        u64 system_dir_blkno;
        u64 bitmap_blkno;
@@ -237,6 +236,7 @@ struct ocfs2_super
        enum ocfs2_local_alloc_state local_alloc_state;
        struct buffer_head *local_alloc_bh;
+        u64 la_last_gd;
        /* Next two fields are for local node slot recovery during
         * mount. */
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 195523090c87..9d91e66f51a9 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -70,12 +70,6 @@ static int ocfs2_block_group_search(struct inode *inode,
                                    struct buffer_head *group_bh,
                                    u32 bits_wanted, u32 min_bits,
                                    u16 *bit_off, u16 *bits_found);
-static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
-                              u32 bits_wanted,
-                              u32 min_bits,
-                              u16 *bit_off,
-                              unsigned int *num_bits,
-                              u64 *bg_blkno);
 static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     struct ocfs2_alloc_context *ac,
                                     u32 bits_wanted,
@@ -85,11 +79,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                                     u64 *bg_blkno);
 static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
                                         int nr);
-static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
-                                             struct buffer_head *bg_bh,
-                                             unsigned int bits_wanted,
-                                             u16 *bit_off,
-                                             u16 *bits_found);
 static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle,
                                             struct inode *alloc_inode,
                                             struct ocfs2_group_desc *bg,
@@ -143,6 +132,64 @@ static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
        return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
 }
+/* somewhat more expensive than our other checks, so use sparingly. */
+static int ocfs2_check_group_descriptor(struct super_block *sb,
+                                        struct ocfs2_dinode *di,
+                                        struct ocfs2_group_desc *gd)
+{
+        unsigned int max_bits;
+        if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+                OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
+                return -EIO;
+        }
+        if (di->i_blkno != gd->bg_parent_dinode) {
+                ocfs2_error(sb, "Group descriptor # %llu has bad parent "
+                            "pointer (%llu, expected %llu)",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
+                            (unsigned long long)le64_to_cpu(di->i_blkno));
+                return -EIO;
+        }
+        max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
+        if (le16_to_cpu(gd->bg_bits) > max_bits) {
+                ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            le16_to_cpu(gd->bg_bits));
+                return -EIO;
+        }
+        if (le16_to_cpu(gd->bg_chain) >=
+            le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
+                ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            le16_to_cpu(gd->bg_chain));
+                return -EIO;
+        }
+        if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
+                ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+                            "claims that %u are free",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            le16_to_cpu(gd->bg_bits),
+                            le16_to_cpu(gd->bg_free_bits_count));
+                return -EIO;
+        }
+        if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
+                ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
+                            "max bitmap bits of %u",
+                            (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                            le16_to_cpu(gd->bg_bits),
+                            8 * le16_to_cpu(gd->bg_size));
+                return -EIO;
+        }
+        return 0;
+}
 static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle,
                                  struct inode *alloc_inode,
                                  struct buffer_head *bg_bh,
@@ -663,6 +710,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
 static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
                                             struct buffer_head *bg_bh,
                                             unsigned int bits_wanted,
+                                             unsigned int total_bits,
                                             u16 *bit_off,
                                             u16 *bits_found)
 {
@@ -679,10 +727,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
        found = start = best_offset = best_size = 0;
        bitmap = bg->bg_bitmap;
-        while((offset = ocfs2_find_next_zero_bit(bitmap,
+        while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
-                                                 le16_to_cpu(bg->bg_bits),
+                if (offset == total_bits)
-                                                 start)) != -1) {
-                if (offset == le16_to_cpu(bg->bg_bits))
                        break;
                if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
@@ -911,14 +957,35 @@ static int ocfs2_cluster_group_search(struct inode *inode,
 {
        int search = -ENOSPC;
        int ret;
-        struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
+        struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
        u16 tmp_off, tmp_found;
+        unsigned int max_bits, gd_cluster_off;
        BUG_ON(!ocfs2_is_cluster_bitmap(inode));
-        if (bg->bg_free_bits_count) {
+        if (gd->bg_free_bits_count) {
+                max_bits = le16_to_cpu(gd->bg_bits);
+                /* Tail groups in cluster bitmaps which aren't cpg
+                 * aligned are prone to partial extention by a failed
+                 * fs resize. If the file system resize never got to
+                 * update the dinode cluster count, then we don't want
+                 * to trust any clusters past it, regardless of what
+                 * the group descriptor says. */
+                gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+                                                          le64_to_cpu(gd->bg_blkno));
+                if ((gd_cluster_off + max_bits) >
+                    OCFS2_I(inode)->ip_clusters) {
+                        max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+                        mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
+                             (unsigned long long)le64_to_cpu(gd->bg_blkno),
+                             le16_to_cpu(gd->bg_bits),
+                             OCFS2_I(inode)->ip_clusters, max_bits);
+                }
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
+                                                        max_bits,
                                                        &tmp_off, &tmp_found);
                if (ret)
                        return ret;
@@ -951,17 +1018,109 @@ static int ocfs2_block_group_search(struct inode *inode,
        if (bg->bg_free_bits_count)
                ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
                                                        group_bh, bits_wanted,
+                                                        le16_to_cpu(bg->bg_bits),
                                                        bit_off, bits_found);
        return ret;
 }
+static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+                                       struct ocfs2_journal_handle *handle,
+                                       struct buffer_head *di_bh,
+                                       u32 num_bits,
+                                       u16 chain)
+{
+        int ret;
+        u32 tmp_used;
+        struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
+        struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
+        ret = ocfs2_journal_access(handle, inode, di_bh,
+                                   OCFS2_JOURNAL_ACCESS_WRITE);
+        if (ret < 0) {
+                mlog_errno(ret);
+                goto out;
+        }
+        tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
+        di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
+        le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
+        ret = ocfs2_journal_dirty(handle, di_bh);
+        if (ret < 0)
+                mlog_errno(ret);
+out:
+        return ret;
+}
+static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
+                                  u32 bits_wanted,
+                                  u32 min_bits,
+                                  u16 *bit_off,
+                                  unsigned int *num_bits,
+                                  u64 gd_blkno,
+                                  u16 *bits_left)
+{
+        int ret;
+        u16 found;
+        struct buffer_head *group_bh = NULL;
+        struct ocfs2_group_desc *gd;
+        struct inode *alloc_inode = ac->ac_inode;
+        struct ocfs2_journal_handle *handle = ac->ac_handle;
+        ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
+                               &group_bh, OCFS2_BH_CACHED, alloc_inode);
+        if (ret < 0) {
+                mlog_errno(ret);
+                return ret;
+        }
+        gd = (struct ocfs2_group_desc *) group_bh->b_data;
+        if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
+                OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
+                ret = -EIO;
+                goto out;
+        }
+        ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
+                                  bit_off, &found);
+        if (ret < 0) {
+                if (ret != -ENOSPC)
+                        mlog_errno(ret);
+                goto out;
+        }
+        *num_bits = found;
+        ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
+                                               *num_bits,
+                                               le16_to_cpu(gd->bg_chain));
+        if (ret < 0) {
+                mlog_errno(ret);
+                goto out;
+        }
+        ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
+                                         *bit_off, *num_bits);
+        if (ret < 0)
+                mlog_errno(ret);
+        *bits_left = le16_to_cpu(gd->bg_free_bits_count);
+out:
+        brelse(group_bh);
+        return ret;
+}
 static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                              u32 bits_wanted,
                              u32 min_bits,
                              u16 *bit_off,
                              unsigned int *num_bits,
-                              u64 *bg_blkno)
+                              u64 *bg_blkno,
+                              u16 *bits_left)
 {
        int status;
        u16 chain, tmp_bits;
@@ -988,9 +1147,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                goto bail;
        }
        bg = (struct ocfs2_group_desc *) group_bh->b_data;
-        if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
+        status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
-                OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
+        if (status) {
-                status = -EIO;
+                mlog_errno(status);
                goto bail;
        }
@@ -1018,9 +1177,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
                        goto bail;
                }
                bg = (struct ocfs2_group_desc *) group_bh->b_data;
-                if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
+                status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
-                        OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
+                if (status) {
-                        status = -EIO;
+                        mlog_errno(status);
                        goto bail;
                }
        }
@@ -1099,6 +1258,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
             (unsigned long long)fe->i_blkno);
        *bg_blkno = le64_to_cpu(bg->bg_blkno);
+        *bits_left = le16_to_cpu(bg->bg_free_bits_count);
 bail:
        if (group_bh)
                brelse(group_bh);
@@ -1120,6 +1280,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
 {
        int status;
        u16 victim, i;
+        u16 bits_left = 0;
+        u64 hint_blkno = ac->ac_last_group;
        struct ocfs2_chain_list *cl;
        struct ocfs2_dinode *fe;
@@ -1146,6 +1308,28 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                goto bail;
        }
+        if (hint_blkno) {
+                /* Attempt to short-circuit the usual search mechanism
+                 * by jumping straight to the most recently used
+                 * allocation group. This helps us mantain some
+                 * contiguousness across allocations. */
+                status = ocfs2_search_one_group(ac, bits_wanted, min_bits,
+                                                bit_off, num_bits,
+                                                hint_blkno, &bits_left);
+                if (!status) {
+                        /* Be careful to update *bg_blkno here as the
+                         * caller is expecting it to be filled in, and
+                         * ocfs2_search_one_group() won't do that for
+                         * us. */
+                        *bg_blkno = hint_blkno;
+                        goto set_hint;
+                }
+                if (status < 0 && status != -ENOSPC) {
+                        mlog_errno(status);
+                        goto bail;
+                }
+        }
        cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
        victim = ocfs2_find_victim_chain(cl);
@@ -1153,9 +1337,9 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
        ac->ac_allow_chain_relink = 1;
        status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off,
-                                    num_bits, bg_blkno);
+                                    num_bits, bg_blkno, &bits_left);
        if (!status)
-                goto bail;
+                goto set_hint;
        if (status < 0 && status != -ENOSPC) {
                mlog_errno(status);
                goto bail;
@@ -1177,8 +1361,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                ac->ac_chain = i;
                status = ocfs2_search_chain(ac, bits_wanted, min_bits,
-                                            bit_off, num_bits,
+                                            bit_off, num_bits, bg_blkno,
-                                            bg_blkno);
+                                            &bits_left);
                if (!status)
                        break;
                if (status < 0 && status != -ENOSPC) {
@@ -1186,8 +1370,19 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
                        goto bail;
                }
        }
-bail:
+set_hint:
+        if (status != -ENOSPC) {
+                /* If the next search of this group is not likely to
+                 * yield a suitable extent, then we reset the last
+                 * group hint so as to not waste a disk read */
+                if (bits_left < min_bits)
+                        ac->ac_last_group = 0;
+                else
+                        ac->ac_last_group = *bg_blkno;
+        }
+bail:
        mlog_exit(status);
        return status;
 }
@@ -1341,7 +1536,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
 {
        int status;
        unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
-        u64 bg_blkno;
+        u64 bg_blkno = 0;
        u16 bg_bit_off;
        mlog_entry_void();
@@ -1494,9 +1689,9 @@ static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle,
        }
        group = (struct ocfs2_group_desc *) group_bh->b_data;
-        if (!OCFS2_IS_VALID_GROUP_DESC(group)) {
+        status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
-                OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, group);
+        if (status) {
-                status = -EIO;
+                mlog_errno(status);
                goto bail;
        }
        BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a76c82a7ceac..c787838d1052 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -49,6 +49,8 @@ struct ocfs2_alloc_context {
        u16    ac_chain;
        int    ac_allow_chain_relink;
        group_search_t *ac_group_search;
+        u64    ac_last_group;
 };
 void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 382706a67ffd..d17e33e66a1e 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1442,8 +1442,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
        osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
+        /* We don't have a cluster lock on the bitmap here because
+         * we're only interested in static information and the extra
+         * complexity at mount time isn't worht it. Don't pass the
+         * inode in to the read function though as we don't want it to
+         * be put in the cache. */
        status = ocfs2_read_block(osb, osb->bitmap_blkno, &bitmap_bh, 0,
-                                  inode);
+                                  NULL);
        iput(inode);
        if (status < 0) {
                mlog_errno(status);
@@ -1452,7 +1457,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
        di = (struct ocfs2_dinode *) bitmap_bh->b_data;
        osb->bitmap_cpg = le16_to_cpu(di->id2.i_chain.cl_cpg);
-        osb->num_clusters = le32_to_cpu(di->id1.bitmap1.i_total);
        brelse(bitmap_bh);
        mlog(0, "cluster bitmap inode: %llu, clusters per group: %u\n",
             (unsigned long long)osb->bitmap_blkno, osb->bitmap_cpg);
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index abe91ca03edf..0a5927c806ca 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -74,7 +74,7 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
        spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect);
        for (i = 0; i < 8; i++, p++) {
                unsigned long st_sector;
-                int num_sectors;
+                unsigned int num_sectors;
                st_sector = be32_to_cpu(p->start_cylinder) * spc;
                num_sectors = be32_to_cpu(p->num_sectors);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 9f2cfc30f9cf..942156225447 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -169,7 +169,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
                "Mapped:       %8lu kB\n"
                "Slab:         %8lu kB\n"
                "PageTables:   %8lu kB\n"
-                "NFS Unstable: %8lu kB\n"
+                "NFS_Unstable: %8lu kB\n"
                "Bounce:       %8lu kB\n"
                "CommitLimit:  %8lu kB\n"
                "Committed_AS: %8lu kB\n"
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 39fedaa88a0c..d935fb9394e3 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -424,7 +424,7 @@ int xattr_readdir(struct file *file, filldir_t filler, void *buf)
        int res = -ENOTDIR;
        if (!file->f_op || !file->f_op->readdir)
                goto out;
-        mutex_lock(&inode->i_mutex);
+        mutex_lock_nested(&inode->i_mutex, I_MUTEX_XATTR);
 //        down(&inode->i_zombie);
        res = -ENOENT;
        if (!IS_DEADDIR(inode)) {
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 4df822c881b6..fcce1a21a51b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -115,6 +115,13 @@ static struct inode *udf_alloc_inode(struct super_block *sb)
        ei = (struct udf_inode_info *)kmem_cache_alloc(udf_inode_cachep, SLAB_KERNEL);
        if (!ei)
                return NULL;
+        ei->i_unique = 0;
+        ei->i_lenExtents = 0;
+        ei->i_next_alloc_block = 0;
+        ei->i_next_alloc_goal = 0;
+        ei->i_strat4096 = 0;
        return &ei->vfs_inode;
 }
@@ -1652,7 +1659,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
                iput(inode);
                goto error_out;
        }
-        sb->s_maxbytes = MAX_LFS_FILESIZE;
+        sb->s_maxbytes = 1<<30;
        return 0;
 error_out:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index e1b0e8cfecb4..0abd66ce36ea 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode)
        {
                if (offset)
                {
-                        extoffset -= adsize;
+                        /*
-                        etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
+                         *  OK, there is not extent covering inode->i_size and
-                        if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
+                         *  no extent above inode->i_size => truncate is
-                        {
+                         *  extending the file by 'offset'.
-                                extoffset -= adsize;
+                         */
-                                elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
+                        if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) ||
-                                udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+                            (bh && extoffset == sizeof(struct allocExtDesc))) {
+                                /* File has no extents at all! */
+                                memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+                                elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+                                udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
                        }
-                        else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
+                        else {
-                        {
-                                kernel_lb_addr neloc = { 0, 0 };
                                extoffset -= adsize;
-                                nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
+                                etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
-                                        ((elen + offset + inode->i_sb->s_blocksize - 1) &
+                                if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
-                                        ~(inode->i_sb->s_blocksize - 1));
+                                {
-                                udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
+                                        extoffset -= adsize;
-                                udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
+                                        elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
-                        }
+                                        udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
-                        else
+                                }
-                        {
+                                else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
-                                if (elen & (inode->i_sb->s_blocksize - 1))
                                {
+                                        kernel_lb_addr neloc = { 0, 0 };
                                        extoffset -= adsize;
-                                        elen = EXT_RECORDED_ALLOCATED |
+                                        nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
-                                                ((elen + inode->i_sb->s_blocksize - 1) &
+                                                ((elen + offset + inode->i_sb->s_blocksize - 1) &
                                                ~(inode->i_sb->s_blocksize - 1));
-                                        udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+                                        udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
+                                        udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
+                                }
+                                else
+                                {
+                                        if (elen & (inode->i_sb->s_blocksize - 1))
+                                        {
+                                                extoffset -= adsize;
+                                                elen = EXT_RECORDED_ALLOCATED |
+                                                        ((elen + inode->i_sb->s_blocksize - 1) &
+                                                        ~(inode->i_sb->s_blocksize - 1));
+                                                udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+                                        }
+                                        memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+                                        elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+                                        udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
                                }
-                                memset(&eloc, 0x00, sizeof(kernel_lb_addr));
-                                elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
-                                udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
                        }
                }
        }
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index e7c8615beb65..30c6e8a9446c 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -169,18 +169,20 @@ static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
 static struct buffer_head *
 ufs_clear_frags(struct inode *inode, sector_t beg,
-                unsigned int n)
+                unsigned int n, sector_t want)
 {
-        struct buffer_head *res, *bh;
+        struct buffer_head *res = NULL, *bh;
        sector_t end = beg + n;
-        res = sb_getblk(inode->i_sb, beg);
+        for (; beg < end; ++beg) {
-        ufs_clear_frag(inode, res);
-        for (++beg; beg < end; ++beg) {
                bh = sb_getblk(inode->i_sb, beg);
                ufs_clear_frag(inode, bh);
-                brelse(bh);
+                if (want != beg)
+                        brelse(bh);
+                else
+                        res = bh;
        }
+        BUG_ON(!res);
        return res;
 }
@@ -265,7 +267,9 @@ repeat:
                        lastfrag = ufsi->i_lastfrag;
                        
                }
-                goal = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]) + uspi->s_fpb;
+                tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]);
+                if (tmp)
+                        goal = tmp + uspi->s_fpb;
                tmp = ufs_new_fragments (inode, p, fragment - blockoff, 
                                         goal, required + blockoff,
                                         err, locked_page);
@@ -277,13 +281,15 @@ repeat:
                tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
                                        fs32_to_cpu(sb, *p), required +  (blockoff - lastblockoff),
                                        err, locked_page);
-        }
+        } else /* (lastblock > block) */ {
        /*
         * We will allocate new block before last allocated block
         */
-        else /* (lastblock > block) */ {
+                if (block) {
-                if (lastblock && (tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock-1])))
+                        tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]);
-                        goal = tmp + uspi->s_fpb;
+                        if (tmp)
+                                goal = tmp + uspi->s_fpb;
+                }
                tmp = ufs_new_fragments(inode, p, fragment - blockoff,
                                        goal, uspi->s_fpb, err, locked_page);
        }
@@ -296,7 +302,7 @@ repeat:
        }
        if (!phys) {
-                result = ufs_clear_frags(inode, tmp + blockoff, required);
+                result = ufs_clear_frags(inode, tmp, required, tmp + blockoff);
        } else {
                *phys = tmp + blockoff;
                result = NULL;
@@ -383,7 +389,7 @@ repeat:
                }
        }
-        if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1]) + uspi->s_fpb))
+        if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1])))
                goal = tmp + uspi->s_fpb;
        else
                goal = bh->b_blocknr + uspi->s_fpb;
@@ -397,7 +403,8 @@ repeat:
        if (!phys) {
-                result = ufs_clear_frags(inode, tmp + blockoff, uspi->s_fpb);
+                result = ufs_clear_frags(inode, tmp, uspi->s_fpb,
+                                         tmp + blockoff);
        } else {
                *phys = tmp + blockoff;
                *new = 1;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index c9b55872079b..ea11d04c41a0 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -375,17 +375,15 @@ static int ufs_alloc_lastblock(struct inode *inode)
        int err = 0;
        struct address_space *mapping = inode->i_mapping;
        struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
-        struct ufs_inode_info *ufsi = UFS_I(inode);
        unsigned lastfrag, i, end;
        struct page *lastpage;
        struct buffer_head *bh;
        lastfrag = (i_size_read(inode) + uspi->s_fsize - 1) >> uspi->s_fshift;
-        if (!lastfrag) {
+        if (!lastfrag)
-                ufsi->i_lastfrag = 0;
                goto out;
-        }
        lastfrag--;
        lastpage = ufs_get_locked_page(mapping, lastfrag >>
@@ -400,25 +398,25 @@ static int ufs_alloc_lastblock(struct inode *inode)
       for (i = 0; i < end; ++i)
               bh = bh->b_this_page;
-       if (!buffer_mapped(bh)) {
-               err = ufs_getfrag_block(inode, lastfrag, bh, 1);
+       err = ufs_getfrag_block(inode, lastfrag, bh, 1);
-               if (unlikely(err))
+       if (unlikely(err))
-                       goto out_unlock;
+               goto out_unlock;
-               if (buffer_new(bh)) {
+       if (buffer_new(bh)) {
-                       clear_buffer_new(bh);
+               clear_buffer_new(bh);
-                       unmap_underlying_metadata(bh->b_bdev,
+               unmap_underlying_metadata(bh->b_bdev,
-                                                 bh->b_blocknr);
+                                         bh->b_blocknr);
-                       /*
+               /*
-                        * we do not zeroize fragment, because of
+                * we do not zeroize fragment, because of
-                        * if it maped to hole, it already contains zeroes
+                * if it maped to hole, it already contains zeroes
-                        */
+                */
-                       set_buffer_uptodate(bh);
+               set_buffer_uptodate(bh);
-                       mark_buffer_dirty(bh);
+               mark_buffer_dirty(bh);
-                       set_page_dirty(lastpage);
+               set_page_dirty(lastpage);
-               }
       }
 out_unlock:
       ufs_put_locked_page(lastpage);
 out:
@@ -440,23 +438,11 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
        if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
                return -EPERM;
-        if (inode->i_size > old_i_size) {
+        err = ufs_alloc_lastblock(inode);
-                /*
-                 * if we expand file we should care about
-                 * allocation of block for last byte first of all
-                 */
-                err = ufs_alloc_lastblock(inode);
-                if (err) {
+        if (err) {
-                        i_size_write(inode, old_i_size);
+                i_size_write(inode, old_i_size);
-                        goto out;
+                goto out;
-                }
-                /*
-                 * go away, because of we expand file, and we do not
-                 * need free blocks, and zeroizes page
-                 */
-                lock_kernel();
-                goto almost_end;
        }
        block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block);
@@ -477,21 +463,8 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
                yield();
        }
-        if (inode->i_size < old_i_size) {
-                /*
-                 * now we should have enough space
-                 * to allocate block for last byte
-                 */
-                err = ufs_alloc_lastblock(inode);
-                if (err)
-                        /*
-                         * looks like all the same - we have no space,
-                         * but we truncate file already
-                         */
-                        inode->i_size = (ufsi->i_lastfrag - 1) * uspi->s_fsize;
-        }
-almost_end:
        inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+        ufsi->i_lastfrag = DIRECT_FRAGMENT;
        unlock_kernel();
        mark_inode_dirty(inode);
 out:
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index eef6763f3a67..d2bbcd882a69 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1835,40 +1835,47 @@ xfs_alloc_fix_freelist(
                                &agbp)))
                        return error;
                if (!pag->pagf_init) {
+                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
                        args->agbp = NULL;
                        return 0;
                }
        } else
                agbp = NULL;
-        /* If this is a metadata preferred pag and we are user data
+        /*
+         * If this is a metadata preferred pag and we are user data
         * then try somewhere else if we are not being asked to
         * try harder at this point
         */
-        if (pag->pagf_metadata && args->userdata && flags) {
+        if (pag->pagf_metadata && args->userdata &&
+            (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
+                ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
                args->agbp = NULL;
                return 0;
        }
-        need = XFS_MIN_FREELIST_PAG(pag, mp);
+        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-        delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
+                need = XFS_MIN_FREELIST_PAG(pag, mp);
-        /*
+                delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0;
-         * If it looks like there isn't a long enough extent, or enough
+                /*
-         * total blocks, reject it.
+                 * If it looks like there isn't a long enough extent, or enough
-         */
+                 * total blocks, reject it.
-        longest = (pag->pagf_longest > delta) ?
+                 */
-                (pag->pagf_longest - delta) :
+                longest = (pag->pagf_longest > delta) ?
-                (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
+                        (pag->pagf_longest - delta) :
-        if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
+                        (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
-            (!(flags & XFS_ALLOC_FLAG_FREEING) &&
+                if ((args->minlen + args->alignment + args->minalignslop - 1) >
-             (int)(pag->pagf_freeblks + pag->pagf_flcount -
+                                longest ||
-                   need - args->total) <
+                    ((int)(pag->pagf_freeblks + pag->pagf_flcount -
-             (int)args->minleft)) {
+                           need - args->total) < (int)args->minleft)) {
-                if (agbp)
+                        if (agbp)
-                        xfs_trans_brelse(tp, agbp);
+                                xfs_trans_brelse(tp, agbp);
-                args->agbp = NULL;
+                        args->agbp = NULL;
-                return 0;
+                        return 0;
+                }
        }
        /*
         * Get the a.g. freespace buffer.
         * Can fail if we're not blocking on locks, and it's held.
@@ -1878,6 +1885,8 @@ xfs_alloc_fix_freelist(
                                &agbp)))
                        return error;
                if (agbp == NULL) {
+                        ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
+                        ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
                        args->agbp = NULL;
                        return 0;
                }
@@ -1887,22 +1896,24 @@ xfs_alloc_fix_freelist(
         */
        agf = XFS_BUF_TO_AGF(agbp);
        need = XFS_MIN_FREELIST(agf, mp);
-        delta = need > be32_to_cpu(agf->agf_flcount) ?
-                (need - be32_to_cpu(agf->agf_flcount)) : 0;
        /*
         * If there isn't enough total or single-extent, reject it.
         */
-        longest = be32_to_cpu(agf->agf_longest);
+        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
-        longest = (longest > delta) ? (longest - delta) :
+                delta = need > be32_to_cpu(agf->agf_flcount) ?
-                (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
+                        (need - be32_to_cpu(agf->agf_flcount)) : 0;
-        if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
+                longest = be32_to_cpu(agf->agf_longest);
-             (!(flags & XFS_ALLOC_FLAG_FREEING) &&
+                longest = (longest > delta) ? (longest - delta) :
-                (int)(be32_to_cpu(agf->agf_freeblks) +
+                        (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
-                   be32_to_cpu(agf->agf_flcount) - need - args->total) <
+                if ((args->minlen + args->alignment + args->minalignslop - 1) >
-             (int)args->minleft)) {
+                                longest ||
-                xfs_trans_brelse(tp, agbp);
+                    ((int)(be32_to_cpu(agf->agf_freeblks) +
-                args->agbp = NULL;
+                     be32_to_cpu(agf->agf_flcount) - need - args->total) <
-                return 0;
+                                (int)args->minleft)) {
+                        xfs_trans_brelse(tp, agbp);
+                        args->agbp = NULL;
+                        return 0;
+                }
        }
        /*
         * Make the freelist shorter if it's too long.
@@ -1950,12 +1961,11 @@ xfs_alloc_fix_freelist(
                 * on a completely full ag.
                 */
                if (targs.agbno == NULLAGBLOCK) {
-                        if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+                        if (flags & XFS_ALLOC_FLAG_FREEING)
-                                xfs_trans_brelse(tp, agflbp);
+                                break;
-                                args->agbp = NULL;
+                        xfs_trans_brelse(tp, agflbp);
-                                return 0;
+                        args->agbp = NULL;
-                        }
+                        return 0;
-                        break;
                }
                /*
                 * Put each allocated block on the list.
@@ -2442,31 +2452,26 @@ xfs_free_extent(
        xfs_fsblock_t   bno,    /* starting block number of extent */
        xfs_extlen_t    len)    /* length of extent */
 {
-#ifdef DEBUG
+        xfs_alloc_arg_t args;
-        xfs_agf_t       *agf;   /* a.g. freespace header */
-#endif
-        xfs_alloc_arg_t args;   /* allocation argument structure */
        int             error;
        ASSERT(len != 0);
+        memset(&args, 0, sizeof(xfs_alloc_arg_t));
        args.tp = tp;
        args.mp = tp->t_mountp;
        args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
        ASSERT(args.agno < args.mp->m_sb.sb_agcount);
        args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
-        args.alignment = 1;
-        args.minlen = args.minleft = args.minalignslop = 0;
        down_read(&args.mp->m_peraglock);
        args.pag = &args.mp->m_perag[args.agno];
        if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
                goto error0;
 #ifdef DEBUG
        ASSERT(args.agbp != NULL);
-        agf = XFS_BUF_TO_AGF(args.agbp);
+        ASSERT((args.agbno + len) <=
-        ASSERT(args.agbno + len <= be32_to_cpu(agf->agf_length));
+                be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
 #endif
-        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno,
+        error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
-                len, 0);
 error0:
        up_read(&args.mp->m_peraglock);
        return error;