16 files changed, 321 insertions, 204 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 38f62680fd63..0e11e31dbb77 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -562,6 +562,7 @@ static inline void lock_kiocb(struct kiocb *iocb)
 static inline void unlock_kiocb(struct kiocb *iocb)
 {
        kiocbClearLocked(iocb);
+        smp_mb__after_clear_bit();
        wake_up_bit(&iocb->ki_flags, KIF_LOCKED);
 }
diff --git a/fs/compat.c b/fs/compat.c
index ac3fb9ed8eea..a719e158e002 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -44,6 +44,8 @@
 #include <linux/nfsd/syscall.h>
 #include <linux/personality.h>
 #include <linux/rwsem.h>
+#include <linux/acct.h>
+#include <linux/mm.h>
 #include <net/sock.h>           /* siocdevprivate_ioctl */
@@ -1487,6 +1489,8 @@ int compat_do_execve(char * filename,
                /* execve success */
                security_bprm_free(bprm);
+                acct_update_integrals(current);
+                update_mem_hiwater(current);
                kfree(bprm);
                return retval;
        }
diff --git a/fs/dcache.c b/fs/dcache.c
index 7376b61269fb..fb10386c59be 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -102,7 +102,8 @@ static inline void dentry_iput(struct dentry * dentry)
                list_del_init(&dentry->d_alias);
                spin_unlock(&dentry->d_lock);
                spin_unlock(&dcache_lock);
-                fsnotify_inoderemove(inode);
+                if (!inode->i_nlink)
+                        fsnotify_inoderemove(inode);
                if (dentry->d_op && dentry->d_op->d_iput)
                        dentry->d_op->d_iput(dentry, inode);
                else
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 6ab1dd0ca904..403b90a1213d 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -231,8 +231,9 @@ struct ep_pqueue {
 static void ep_poll_safewake_init(struct poll_safewake *psw);
 static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq);
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile);
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
-static int ep_file_init(struct file *file);
+                    struct eventpoll *ep);
+static int ep_alloc(struct eventpoll **pep);
 static void ep_free(struct eventpoll *ep);
 static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd);
 static void ep_use_epitem(struct epitem *epi);
@@ -501,38 +502,37 @@ void eventpoll_release_file(struct file *file)
 asmlinkage long sys_epoll_create(int size)
 {
        int error, fd;
+        struct eventpoll *ep;
        struct inode *inode;
        struct file *file;
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
                     current, size));
-        /* Sanity check on the size parameter */
+        /*
+         * Sanity check on the size parameter, and create the internal data
+         * structure ( "struct eventpoll" ).
+         */
        error = -EINVAL;
-        if (size <= 0)
+        if (size <= 0 || (error = ep_alloc(&ep)) != 0)
                goto eexit_1;
        /*
         * Creates all the items needed to setup an eventpoll file. That is,
         * a file structure, and inode and a free file descriptor.
         */
-        error = ep_getfd(&fd, &inode, &file);
+        error = ep_getfd(&fd, &inode, &file, ep);
-        if (error)
-                goto eexit_1;
-        /* Setup the file internal data structure ( "struct eventpoll" ) */
-        error = ep_file_init(file);
        if (error)
                goto eexit_2;
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
                     current, size, fd));
        return fd;
 eexit_2:
-        sys_close(fd);
+        ep_free(ep);
+        kfree(ep);
 eexit_1:
        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
                     current, size, error));
@@ -706,7 +706,8 @@ eexit_1:
 /*
 * Creates the file descriptor to be used by the epoll interface.
 */
-static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
+static int ep_getfd(int *efd, struct inode **einode, struct file **efile,
+                    struct eventpoll *ep)
 {
        struct qstr this;
        char name[32];
@@ -756,7 +757,7 @@ static int ep_getfd(int *efd, struct inode **einode, struct file **efile)
        file->f_op = &eventpoll_fops;
        file->f_mode = FMODE_READ;
        file->f_version = 0;
-        file->private_data = NULL;
+        file->private_data = ep;
        /* Install the new setup file into the allocated fd. */
        fd_install(fd, file);
@@ -777,14 +778,13 @@ eexit_1:
 }
-static int ep_file_init(struct file *file)
+static int ep_alloc(struct eventpoll **pep)
 {
-        struct eventpoll *ep;
+        struct eventpoll *ep = kzalloc(sizeof(*ep), GFP_KERNEL);
-        if (!(ep = kmalloc(sizeof(struct eventpoll), GFP_KERNEL)))
+        if (!ep)
                return -ENOMEM;
-        memset(ep, 0, sizeof(*ep));
        rwlock_init(&ep->lock);
        init_rwsem(&ep->sem);
        init_waitqueue_head(&ep->wq);
@@ -792,9 +792,9 @@ static int ep_file_init(struct file *file)
        INIT_LIST_HEAD(&ep->rdllist);
        ep->rbr = RB_ROOT;
-        file->private_data = ep;
+        *pep = ep;
-        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_file_init() ep=%p\n",
+        DNPRINTK(3, (KERN_INFO "[%p] eventpoll: ep_alloc() ep=%p\n",
                     current, ep));
        return 0;
 }
diff --git a/fs/exec.c b/fs/exec.c
index 14dd03907ccb..a04a575ad433 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -421,11 +421,6 @@ int setup_arg_pages(struct linux_binprm *bprm,
        if (!mpnt)
                return -ENOMEM;
-        if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
-                kmem_cache_free(vm_area_cachep, mpnt);
-                return -ENOMEM;
-        }
        memset(mpnt, 0, sizeof(*mpnt));
        down_write(&mm->mmap_sem);
@@ -745,8 +740,8 @@ static inline int de_thread(struct task_struct *tsk)
        }
        /*
-         * Now there are really no other threads at all,
+         * There may be one thread left which is just exiting,
-         * so it's safe to stop telling them to kill themselves.
+         * but it's safe to stop telling the group to kill themselves.
         */
        sig->flags = 0;
@@ -785,7 +780,6 @@ no_thread_group:
                        kmem_cache_free(sighand_cachep, oldsighand);
        }
-        BUG_ON(!thread_group_empty(current));
        BUG_ON(!thread_group_leader(current));
        return 0;
 }
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 62ffa9139400..7134403d5be2 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -12,39 +12,6 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
-static ssize_t fat_file_aio_write(struct kiocb *iocb, const char __user *buf,
-                                  size_t count, loff_t pos)
-{
-        struct inode *inode = iocb->ki_filp->f_dentry->d_inode;
-        int retval;
-        retval = generic_file_aio_write(iocb, buf, count, pos);
-        if (retval > 0) {
-                inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-                MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
-                mark_inode_dirty(inode);
-//              check the locking rules
-//              if (IS_SYNC(inode))
-//                      fat_sync_inode(inode);
-        }
-        return retval;
-}
-static ssize_t fat_file_writev(struct file *filp, const struct iovec *iov,
-                               unsigned long nr_segs, loff_t *ppos)
-{
-        struct inode *inode = filp->f_dentry->d_inode;
-        int retval;
-        retval = generic_file_writev(filp, iov, nr_segs, ppos);
-        if (retval > 0) {
-                inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-                MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
-                mark_inode_dirty(inode);
-        }
-        return retval;
-}
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
                      unsigned int cmd, unsigned long arg)
 {
@@ -148,9 +115,9 @@ struct file_operations fat_file_operations = {
        .read           = do_sync_read,
        .write          = do_sync_write,
        .readv          = generic_file_readv,
-        .writev         = fat_file_writev,
+        .writev         = generic_file_writev,
        .aio_read       = generic_file_aio_read,
-        .aio_write      = fat_file_aio_write,
+        .aio_write      = generic_file_aio_write,
        .mmap           = generic_file_mmap,
        .ioctl          = fat_generic_ioctl,
        .fsync          = file_fsync,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a7cbe68e2259..e2effe2dc9b2 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -102,6 +102,19 @@ static int fat_prepare_write(struct file *file, struct page *page,
                                  &MSDOS_I(page->mapping->host)->mmu_private);
 }
+static int fat_commit_write(struct file *file, struct page *page,
+                            unsigned from, unsigned to)
+{
+        struct inode *inode = page->mapping->host;
+        int err = generic_commit_write(file, page, from, to);
+        if (!err && !(MSDOS_I(inode)->i_attrs & ATTR_ARCH)) {
+                inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+                MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+                mark_inode_dirty(inode);
+        }
+        return err;
+}
 static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
 {
        return generic_block_bmap(mapping, block, fat_get_block);
@@ -112,7 +125,7 @@ static struct address_space_operations fat_aops = {
        .writepage      = fat_writepage,
        .sync_page      = block_sync_page,
        .prepare_write  = fat_prepare_write,
-        .commit_write   = generic_commit_write,
+        .commit_write   = fat_commit_write,
        .bmap           = _fat_bmap
 };
@@ -287,9 +300,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
        inode->i_blksize = sbi->cluster_size;
        inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
                           & ~((loff_t)sbi->cluster_size - 1)) >> 9;
-        inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+        inode->i_mtime.tv_sec =
                date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date));
-        inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
+        inode->i_mtime.tv_nsec = 0;
        if (sbi->options.isvfat) {
                int secs = de->ctime_cs / 100;
                int csecs = de->ctime_cs % 100;
@@ -297,8 +310,11 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
                        date_dos2unix(le16_to_cpu(de->ctime),
                                      le16_to_cpu(de->cdate)) + secs;
                inode->i_ctime.tv_nsec = csecs * 10000000;
+                inode->i_atime.tv_sec =
+                        date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate));
+                inode->i_atime.tv_nsec = 0;
        } else
-                inode->i_ctime = inode->i_mtime;
+                inode->i_ctime = inode->i_atime = inode->i_mtime;
        return 0;
 }
@@ -500,7 +516,9 @@ retry:
        raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
        fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date);
        if (sbi->options.isvfat) {
+                __le16 atime;
                fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate);
+                fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate);
                raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
                        inode->i_ctime.tv_nsec / 10000000;
        }
diff --git a/fs/file.c b/fs/file.c
index 2127a7b9dc3a..fd066b261c75 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -69,13 +69,9 @@ void free_fd_array(struct file **array, int num)
 static void __free_fdtable(struct fdtable *fdt)
 {
-        int fdset_size, fdarray_size;
+        free_fdset(fdt->open_fds, fdt->max_fdset);
+        free_fdset(fdt->close_on_exec, fdt->max_fdset);
-        fdset_size = fdt->max_fdset / 8;
+        free_fd_array(fdt->fd, fdt->max_fds);
-        fdarray_size = fdt->max_fds * sizeof(struct file *);
-        free_fdset(fdt->open_fds, fdset_size);
-        free_fdset(fdt->close_on_exec, fdset_size);
-        free_fd_array(fdt->fd, fdarray_size);
        kfree(fdt);
 }
diff --git a/fs/locks.c b/fs/locks.c
index c2c09b4798d6..f7daa5f48949 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -124,6 +124,7 @@
 #include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
+#include <linux/rcupdate.h>
 #include <asm/semaphore.h>
 #include <asm/uaccess.h>
@@ -2205,6 +2206,7 @@ void steal_locks(fl_owner_t from)
        lock_kernel();
        j = 0;
+        rcu_read_lock();
        fdt = files_fdtable(files);
        for (;;) {
                unsigned long set;
@@ -2222,6 +2224,7 @@ void steal_locks(fl_owner_t from)
                        set >>= 1;
                }
        }
+        rcu_read_unlock();
        unlock_kernel();
 }
 EXPORT_SYMBOL(steal_locks);
diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog
index 49eafbdb15c1..c7e9237379c2 100644
--- a/fs/ntfs/ChangeLog
+++ b/fs/ntfs/ChangeLog
@@ -92,6 +92,8 @@ ToDo/Notes:
          an octal number to conform to how chmod(1) works, too.  Thanks to
          Giuseppe Bilotta and Horst von Brand for pointing out the errors of
          my ways.
+        - Fix various bugs in the runlist merging code.  (Based on libntfs
+          changes by Richard Russon.)
 2.1.23 - Implement extension of resident files and make writing safe as well as
         many bug fixes, cleanups, and enhancements...
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index b6cc8cf24626..5e80c07c6a4d 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
        unsigned long flags;
        struct buffer_head *first, *tmp;
        struct page *page;
+        struct inode *vi;
        ntfs_inode *ni;
        int page_uptodate = 1;
        page = bh->b_page;
-        ni = NTFS_I(page->mapping->host);
+        vi = page->mapping->host;
+        ni = NTFS_I(vi);
        if (likely(uptodate)) {
-                s64 file_ofs, initialized_size;
+                loff_t i_size;
+                s64 file_ofs, init_size;
                set_buffer_uptodate(bh);
                file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
                                bh_offset(bh);
                read_lock_irqsave(&ni->size_lock, flags);
-                initialized_size = ni->initialized_size;
+                init_size = ni->initialized_size;
+                i_size = i_size_read(vi);
                read_unlock_irqrestore(&ni->size_lock, flags);
+                if (unlikely(init_size > i_size)) {
+                        /* Race with shrinking truncate. */
+                        init_size = i_size;
+                }
                /* Check for the current buffer head overflowing. */
-                if (file_ofs + bh->b_size > initialized_size) {
+                if (unlikely(file_ofs + bh->b_size > init_size)) {
-                        char *addr;
+                        u8 *kaddr;
-                        int ofs = 0;
+                        int ofs;
-                        if (file_ofs < initialized_size)
+                        ofs = 0;
-                                ofs = initialized_size - file_ofs;
+                        if (file_ofs < init_size)
-                        addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+                                ofs = init_size - file_ofs;
-                        memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs);
+                        kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+                        memset(kaddr + bh_offset(bh) + ofs, 0,
+                                        bh->b_size - ofs);
+                        kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
                        flush_dcache_page(page);
-                        kunmap_atomic(addr, KM_BIO_SRC_IRQ);
                }
        } else {
                clear_buffer_uptodate(bh);
                SetPageError(page);
-                ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.",
+                ntfs_error(ni->vol->sb, "Buffer I/O error, logical block "
-                                (unsigned long long)bh->b_blocknr);
+                                "0x%llx.", (unsigned long long)bh->b_blocknr);
        }
        first = page_buffers(page);
        local_irq_save(flags);
@@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
                if (likely(page_uptodate && !PageError(page)))
                        SetPageUptodate(page);
        } else {
-                char *addr;
+                u8 *kaddr;
                unsigned int i, recs;
                u32 rec_size;
@@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
                recs = PAGE_CACHE_SIZE / rec_size;
                /* Should have been verified before we got here... */
                BUG_ON(!recs);
-                addr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+                kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
                for (i = 0; i < recs; i++)
-                        post_read_mst_fixup((NTFS_RECORD*)(addr +
+                        post_read_mst_fixup((NTFS_RECORD*)(kaddr +
                                        i * rec_size), rec_size);
+                kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
                flush_dcache_page(page);
-                kunmap_atomic(addr, KM_BIO_SRC_IRQ);
                if (likely(page_uptodate && !PageError(page)))
                        SetPageUptodate(page);
        }
@@ -168,8 +178,11 @@ still_busy:
 */
 static int ntfs_read_block(struct page *page)
 {
+        loff_t i_size;
        VCN vcn;
        LCN lcn;
+        s64 init_size;
+        struct inode *vi;
        ntfs_inode *ni;
        ntfs_volume *vol;
        runlist_element *rl;
@@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page)
        int i, nr;
        unsigned char blocksize_bits;
-        ni = NTFS_I(page->mapping->host);
+        vi = page->mapping->host;
+        ni = NTFS_I(vi);
        vol = ni->vol;
        /* $MFT/$DATA must have its complete runlist in memory at all times. */
@@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page)
        bh = head = page_buffers(page);
        BUG_ON(!bh);
+        /*
+         * We may be racing with truncate.  To avoid some of the problems we
+         * now take a snapshot of the various sizes and use those for the whole
+         * of the function.  In case of an extending truncate it just means we
+         * may leave some buffers unmapped which are now allocated.  This is
+         * not a problem since these buffers will just get mapped when a write
+         * occurs.  In case of a shrinking truncate, we will detect this later
+         * on due to the runlist being incomplete and if the page is being
+         * fully truncated, truncate will throw it away as soon as we unlock
+         * it so no need to worry what we do with it.
+         */
        iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
        read_lock_irqsave(&ni->size_lock, flags);
        lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
-        zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits;
+        init_size = ni->initialized_size;
+        i_size = i_size_read(vi);
        read_unlock_irqrestore(&ni->size_lock, flags);
+        if (unlikely(init_size > i_size)) {
+                /* Race with shrinking truncate. */
+                init_size = i_size;
+        }
+        zblock = (init_size + blocksize - 1) >> blocksize_bits;
        /* Loop through all the buffers in the page. */
        rl = NULL;
@@ -366,6 +397,8 @@ handle_zblock:
 */
 static int ntfs_readpage(struct file *file, struct page *page)
 {
+        loff_t i_size;
+        struct inode *vi;
        ntfs_inode *ni, *base_ni;
        u8 *kaddr;
        ntfs_attr_search_ctx *ctx;
@@ -384,14 +417,17 @@ retry_readpage:
                unlock_page(page);
                return 0;
        }
-        ni = NTFS_I(page->mapping->host);
+        vi = page->mapping->host;
+        ni = NTFS_I(vi);
        /*
         * Only $DATA attributes can be encrypted and only unnamed $DATA
         * attributes can be compressed.  Index root can have the flags set but
         * this means to create compressed/encrypted files, not that the
-         * attribute is compressed/encrypted.
+         * attribute is compressed/encrypted.  Note we need to check for
+         * AT_INDEX_ALLOCATION since this is the type of both directory and
+         * index inodes.
         */
-        if (ni->type != AT_INDEX_ROOT) {
+        if (ni->type != AT_INDEX_ALLOCATION) {
                /* If attribute is encrypted, deny access, just like NT4. */
                if (NInoEncrypted(ni)) {
                        BUG_ON(ni->type != AT_DATA);
@@ -456,7 +492,12 @@ retry_readpage:
        read_lock_irqsave(&ni->size_lock, flags);
        if (unlikely(attr_len > ni->initialized_size))
                attr_len = ni->initialized_size;
+        i_size = i_size_read(vi);
        read_unlock_irqrestore(&ni->size_lock, flags);
+        if (unlikely(attr_len > i_size)) {
+                /* Race with shrinking truncate. */
+                attr_len = i_size;
+        }
        kaddr = kmap_atomic(page, KM_USER0);
        /* Copy the data to the page. */
        memcpy(kaddr, (u8*)ctx->attr +
@@ -1341,9 +1382,11 @@ retry_writepage:
         * Only $DATA attributes can be encrypted and only unnamed $DATA
         * attributes can be compressed.  Index root can have the flags set but
         * this means to create compressed/encrypted files, not that the
-         * attribute is compressed/encrypted.
+         * attribute is compressed/encrypted.  Note we need to check for
+         * AT_INDEX_ALLOCATION since this is the type of both directory and
+         * index inodes.
         */
-        if (ni->type != AT_INDEX_ROOT) {
+        if (ni->type != AT_INDEX_ALLOCATION) {
                /* If file is encrypted, deny access, just like NT4. */
                if (NInoEncrypted(ni)) {
                        unlock_page(page);
@@ -1379,8 +1422,8 @@ retry_writepage:
                        unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
                        kaddr = kmap_atomic(page, KM_USER0);
                        memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs);
-                        flush_dcache_page(page);
                        kunmap_atomic(kaddr, KM_USER0);
+                        flush_dcache_page(page);
                }
                /* Handle mst protected attributes. */
                if (NInoMstProtected(ni))
@@ -1443,34 +1486,33 @@ retry_writepage:
        BUG_ON(PageWriteback(page));
        set_page_writeback(page);
        unlock_page(page);
-        /*
-         * Here, we do not need to zero the out of bounds area everytime
-         * because the below memcpy() already takes care of the
-         * mmap-at-end-of-file requirements.  If the file is converted to a
-         * non-resident one, then the code path use is switched to the
-         * non-resident one where the zeroing happens on each ntfs_writepage()
-         * invocation.
-         */
        attr_len = le32_to_cpu(ctx->attr->data.resident.value_length);
        i_size = i_size_read(vi);
        if (unlikely(attr_len > i_size)) {
+                /* Race with shrinking truncate or a failed truncate. */
                attr_len = i_size;
-                ctx->attr->data.resident.value_length = cpu_to_le32(attr_len);
+                /*
+                 * If the truncate failed, fix it up now.  If a concurrent
+                 * truncate, we do its job, so it does not have to do anything.
+                 */
+                err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr,
+                                attr_len);
+                /* Shrinking cannot fail. */
+                BUG_ON(err);
        }
        kaddr = kmap_atomic(page, KM_USER0);
        /* Copy the data from the page to the mft record. */
        memcpy((u8*)ctx->attr +
                        le16_to_cpu(ctx->attr->data.resident.value_offset),
                        kaddr, attr_len);
-        flush_dcache_mft_record_page(ctx->ntfs_ino);
        /* Zero out of bounds area in the page cache page. */
        memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
-        flush_dcache_page(page);
        kunmap_atomic(kaddr, KM_USER0);
+        flush_dcache_mft_record_page(ctx->ntfs_ino);
+        flush_dcache_page(page);
+        /* We are done with the page. */
        end_page_writeback(page);
+        /* Finally, mark the mft record dirty, so it gets written back. */
-        /* Mark the mft record dirty, so it gets written back. */
        mark_mft_record_dirty(ctx->ntfs_ino);
        ntfs_attr_put_search_ctx(ctx);
        unmap_mft_record(base_ni);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index dc4bbe3acf5c..7ec045131808 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -1166,6 +1166,8 @@ err_out:
 *
 * Return 0 on success and -errno on error.  In the error case, the inode will
 * have had make_bad_inode() executed on it.
+ *
+ * Note this cannot be called for AT_INDEX_ALLOCATION.
 */
 static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
 {
@@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
                        }
                }
                /*
-                 * The encryption flag set in an index root just means to
+                 * The compressed/sparse flag set in an index root just means
-                 * compress all files.
+                 * to compress all files.
                 */
                if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
                        ntfs_error(vi->i_sb, "Found mst protected attribute "
@@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
                                        "the mapping pairs array.");
                        goto unm_err_out;
                }
-                if ((NInoCompressed(ni) || NInoSparse(ni)) &&
+                if (NInoCompressed(ni) || NInoSparse(ni)) {
-                                ni->type != AT_INDEX_ROOT) {
                        if (a->data.non_resident.compression_unit != 4) {
                                ntfs_error(vi->i_sb, "Found nonstandard "
                                                "compression unit (%u instead "
diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h
index 3288bcc2c4aa..006946efca8c 100644
--- a/fs/ntfs/malloc.h
+++ b/fs/ntfs/malloc.h
@@ -1,7 +1,7 @@
 /*
 * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project.
 *
- * Copyright (c) 2001-2004 Anton Altaparmakov
+ * Copyright (c) 2001-2005 Anton Altaparmakov
 *
 * This program/include file is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published
diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c
index f5b2ac929081..061b5ff6b73c 100644
--- a/fs/ntfs/runlist.c
+++ b/fs/ntfs/runlist.c
@@ -2,7 +2,7 @@
 * runlist.c - NTFS runlist handling code.  Part of the Linux-NTFS project.
 *
 * Copyright (c) 2001-2005 Anton Altaparmakov
- * Copyright (c) 2002 Richard Russon
+ * Copyright (c) 2002-2005 Richard Russon
 *
 * This program/include file is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as published
@@ -158,17 +158,21 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst,
        BUG_ON(!dst);
        BUG_ON(!src);
-        if ((dst->lcn < 0) || (src->lcn < 0)) {   /* Are we merging holes? */
+        /* We can merge unmapped regions even if they are misaligned. */
-                if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE)
+        if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED))
-                        return TRUE;
+                return TRUE;
+        /* If the runs are misaligned, we cannot merge them. */
+        if ((dst->vcn + dst->length) != src->vcn)
                return FALSE;
-        }
+        /* If both runs are non-sparse and contiguous, we can merge them. */
-        if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */
+        if ((dst->lcn >= 0) && (src->lcn >= 0) &&
-                return FALSE;
+                        ((dst->lcn + dst->length) == src->lcn))
-        if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */
+                return TRUE;
-                return FALSE;
+        /* If we are merging two holes, we can merge them. */
+        if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE))
-        return TRUE;
+                return TRUE;
+        /* Cannot merge. */
+        return FALSE;
 }
 /**
@@ -214,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src)
 static inline runlist_element *ntfs_rl_append(runlist_element *dst,
                int dsize, runlist_element *src, int ssize, int loc)
 {
-        BOOL right;
+        BOOL right = FALSE;     /* Right end of @src needs merging. */
-        int magic;
+        int marker;             /* End of the inserted runs. */
        BUG_ON(!dst);
        BUG_ON(!src);
        /* First, check if the right hand end needs merging. */
-        right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+        if ((loc + 1) < dsize)
+                right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
        /* Space required: @dst size + @src size, less one if we merged. */
        dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right);
@@ -236,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
        if (right)
                __ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
-        magic = loc + ssize;
+        /* First run after the @src runs that have been inserted. */
+        marker = loc + ssize + 1;
        /* Move the tail of @dst out of the way, then copy in @src. */
-        ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right);
+        ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right));
        ntfs_rl_mc(dst, loc + 1, src, 0, ssize);
        /* Adjust the size of the preceding hole. */
        dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
        /* We may have changed the length of the file, so fix the end marker */
-        if (dst[magic + 1].lcn == LCN_ENOENT)
+        if (dst[marker].lcn == LCN_ENOENT)
-                dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length;
+                dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
        return dst;
 }
@@ -279,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst,
 static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
                int dsize, runlist_element *src, int ssize, int loc)
 {
-        BOOL left = FALSE;
+        BOOL left = FALSE;      /* Left end of @src needs merging. */
-        BOOL disc = FALSE;      /* Discontinuity */
+        BOOL disc = FALSE;      /* Discontinuity between @dst and @src. */
-        BOOL hole = FALSE;      /* Following a hole */
+        int marker;             /* End of the inserted runs. */
-        int magic;
        BUG_ON(!dst);
        BUG_ON(!src);
-        /* disc => Discontinuity between the end of @dst and the start of @src.
+        /*
-         *         This means we might need to insert a hole.
+         * disc => Discontinuity between the end of @dst and the start of @src.
-         * hole => @dst ends with a hole or an unmapped region which we can
+         *         This means we might need to insert a "not mapped" run.
-         *         extend to match the discontinuity. */
+         */
        if (loc == 0)
                disc = (src[0].vcn > 0);
        else {
@@ -303,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
                        merged_length += src->length;
                disc = (src[0].vcn > dst[loc - 1].vcn + merged_length);
-                if (disc)
-                        hole = (dst[loc - 1].lcn == LCN_HOLE);
        }
+        /*
-        /* Space required: @dst size + @src size, less one if we merged, plus
+         * Space required: @dst size + @src size, less one if we merged, plus
-         * one if there was a discontinuity, less one for a trailing hole. */
+         * one if there was a discontinuity.
-        dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole);
+         */
+        dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc);
        if (IS_ERR(dst))
                return dst;
        /*
         * We are guaranteed to succeed from here so can start modifying the
         * original runlist.
         */
        if (left)
                __ntfs_rl_merge(dst + loc - 1, src);
+        /*
-        magic = loc + ssize - left + disc - hole;
+         * First run after the @src runs that have been inserted.
+         * Nominally,  @marker equals @loc + @ssize, i.e. location + number of
+         * runs in @src.  However, if @left, then the first run in @src has
+         * been merged with one in @dst.  And if @disc, then @dst and @src do
+         * not meet and we need an extra run to fill the gap.
+         */
+        marker = loc + ssize - left + disc;
        /* Move the tail of @dst out of the way, then copy in @src. */
-        ntfs_rl_mm(dst, magic, loc, dsize - loc);
+        ntfs_rl_mm(dst, marker, loc, dsize - loc);
-        ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left);
+        ntfs_rl_mc(dst, loc + disc, src, left, ssize - left);
-        /* Adjust the VCN of the last run ... */
+        /* Adjust the VCN of the first run after the insertion... */
-        if (dst[magic].lcn <= LCN_HOLE)
+        dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
-                dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
        /* ... and the length. */
-        if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED)
+        if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED)
-                dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn;
+                dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn;
-        /* Writing beyond the end of the file and there's a discontinuity. */
+        /* Writing beyond the end of the file and there is a discontinuity. */
        if (disc) {
-                if (hole)
+                if (loc > 0) {
-                        dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn;
+                        dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length;
-                else {
+                        dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn;
-                        if (loc > 0) {
+                } else {
-                                dst[loc].vcn = dst[loc - 1].vcn +
+                        dst[loc].vcn = 0;
-                                                dst[loc - 1].length;
+                        dst[loc].length = dst[loc + 1].vcn;
-                                dst[loc].length = dst[loc + 1].vcn -
-                                                dst[loc].vcn;
-                        } else {
-                                dst[loc].vcn = 0;
-                                dst[loc].length = dst[loc + 1].vcn;
-                        }
-                        dst[loc].lcn = LCN_RL_NOT_MAPPED;
                }
+                dst[loc].lcn = LCN_RL_NOT_MAPPED;
-                magic += hole;
-                if (dst[magic].lcn == LCN_ENOENT)
-                        dst[magic].vcn = dst[magic - 1].vcn +
-                                        dst[magic - 1].length;
        }
        return dst;
 }
@@ -385,20 +381,23 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst,
 static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
                int dsize, runlist_element *src, int ssize, int loc)
 {
-        BOOL left = FALSE;
+        BOOL left = FALSE;      /* Left end of @src needs merging. */
-        BOOL right;
+        BOOL right = FALSE;     /* Right end of @src needs merging. */
-        int magic;
+        int tail;               /* Start of tail of @dst. */
+        int marker;             /* End of the inserted runs. */
        BUG_ON(!dst);
        BUG_ON(!src);
-        /* First, merge the left and right ends, if necessary. */
+        /* First, see if the left and right ends need merging. */
-        right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
+        if ((loc + 1) < dsize)
+                right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1);
        if (loc > 0)
                left = ntfs_are_rl_mergeable(dst + loc - 1, src);
+        /*
-        /* Allocate some space. We'll need less if the left, right, or both
+         * Allocate some space.  We will need less if the left, right, or both
-         * ends were merged. */
+         * ends get merged.
+         */
        dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right);
        if (IS_ERR(dst))
                return dst;
@@ -406,21 +405,37 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst,
         * We are guaranteed to succeed from here so can start modifying the
         * original runlists.
         */
+        /* First, merge the left and right ends, if necessary. */
        if (right)
                __ntfs_rl_merge(src + ssize - 1, dst + loc + 1);
        if (left)
                __ntfs_rl_merge(dst + loc - 1, src);
+        /*
-        /* FIXME: What does this mean? (AIA) */
+         * Offset of the tail of @dst.  This needs to be moved out of the way
-        magic = loc + ssize - left;
+         * to make space for the runs to be copied from @src, i.e. the first
+         * run of the tail of @dst.
+         * Nominally, @tail equals @loc + 1, i.e. location, skipping the
+         * replaced run.  However, if @right, then one of @dst's runs is
+         * already merged into @src.
+         */
+        tail = loc + right + 1;
+        /*
+         * First run after the @src runs that have been inserted, i.e. where
+         * the tail of @dst needs to be moved to.
+         * Nominally, @marker equals @loc + @ssize, i.e. location + number of
+         * runs in @src.  However, if @left, then the first run in @src has
+         * been merged with one in @dst.
+         */
+        marker = loc + ssize - left;
        /* Move the tail of @dst out of the way, then copy in @src. */
-        ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1);
+        ntfs_rl_mm(dst, marker, tail, dsize - tail);
        ntfs_rl_mc(dst, loc, src, left, ssize - left);
-        /* We may have changed the length of the file, so fix the end marker */
+        /* We may have changed the length of the file, so fix the end marker. */
-        if (dst[magic].lcn == LCN_ENOENT)
+        if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT)
-                dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length;
+                dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length;
        return dst;
 }
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d88d518d30f6..d84eecacbeaf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -74,6 +74,7 @@
 #include <linux/file.h>
 #include <linux/times.h>
 #include <linux/cpuset.h>
+#include <linux/rcupdate.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -180,12 +181,14 @@ static inline char * task_state(struct task_struct *p, char *buffer)
                p->gid, p->egid, p->sgid, p->fsgid);
        read_unlock(&tasklist_lock);
        task_lock(p);
+        rcu_read_lock();
        if (p->files)
                fdt = files_fdtable(p->files);
        buffer += sprintf(buffer,
                "FDSize:\t%d\n"
                "Groups:\t",
                fdt ? fdt->max_fds : 0);
+        rcu_read_unlock();
        group_info = p->group_info;
        get_group_info(group_info);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 23db452ab428..fb34f88a4a74 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -340,6 +340,52 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf
        return result;
 }
+/* Same as proc_root_link, but this addionally tries to get fs from other
+ * threads in the group */
+static int proc_task_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+        struct fs_struct *fs;
+        int result = -ENOENT;
+        struct task_struct *leader = proc_task(inode);
+        task_lock(leader);
+        fs = leader->fs;
+        if (fs) {
+                atomic_inc(&fs->count);
+                task_unlock(leader);
+        } else {
+                /* Try to get fs from other threads */
+                task_unlock(leader);
+                struct task_struct *task = leader;
+                read_lock(&tasklist_lock);
+                if (pid_alive(task)) {
+                        while ((task = next_thread(task)) != leader) {
+                                task_lock(task);
+                                fs = task->fs;
+                                if (fs) {
+                                        atomic_inc(&fs->count);
+                                        task_unlock(task);
+                                        break;
+                                }
+                                task_unlock(task);
+                        }
+                }
+                read_unlock(&tasklist_lock);
+        }
+        if (fs) {
+                read_lock(&fs->lock);
+                *mnt = mntget(fs->rootmnt);
+                *dentry = dget(fs->root);
+                read_unlock(&fs->lock);
+                result = 0;
+                put_fs_struct(fs);
+        }
+        return result;
+}
 #define MAY_PTRACE(task) \
        (task == current || \
        (task->parent == current && \
@@ -471,14 +517,14 @@ static int proc_oom_score(struct task_struct *task, char *buffer)
 /* permission checks */
-static int proc_check_root(struct inode *inode)
+/* If the process being read is separated by chroot from the reading process,
+ * don't let the reader access the threads.
+ */
+static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
 {
-        struct dentry *de, *base, *root;
+        struct dentry *de, *base;
-        struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
+        struct vfsmount *our_vfsmnt, *mnt;
        int res = 0;
-        if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
-                return -ENOENT;
        read_lock(&current->fs->lock);
        our_vfsmnt = mntget(current->fs->rootmnt);
        base = dget(current->fs->root);
@@ -511,6 +557,16 @@ out:
        goto exit;
 }
+static int proc_check_root(struct inode *inode)
+{
+        struct dentry *root;
+        struct vfsmount *vfsmnt;
+        if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
+                return -ENOENT;
+        return proc_check_chroot(root, vfsmnt);
+}
 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
        if (generic_permission(inode, mask, NULL) != 0)
@@ -518,6 +574,20 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
        return proc_check_root(inode);
 }
+static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+        struct dentry *root;
+        struct vfsmount *vfsmnt;
+        if (generic_permission(inode, mask, NULL) != 0)
+                return -EACCES;
+        if (proc_task_root_link(inode, &root, &vfsmnt))
+                return -ENOENT;
+        return proc_check_chroot(root, vfsmnt);
+}
 extern struct seq_operations proc_pid_maps_op;
 static int maps_open(struct inode *inode, struct file *file)
 {
@@ -1419,7 +1489,7 @@ static struct inode_operations proc_fd_inode_operations = {
 static struct inode_operations proc_task_inode_operations = {
        .lookup         = proc_task_lookup,
-        .permission     = proc_permission,
+        .permission     = proc_task_permission,
 };
 #ifdef CONFIG_SECURITY