Merge branch 'master' into gfs2

author: Steven Whitehouse <swhiteho@redhat.com> 2006-10-02 08:45:08 -0400
committer: Steven Whitehouse <swhiteho@redhat.com> 2006-10-02 08:45:08 -0400
commit: 59458f40e25915a355d8b1d701425fe9f4f9ea23 (patch)
tree: f1c9a2934df686e36d75f759ab7313b6f0e0e5f9 /mm
parent: 825f9075d74028d11d7f5932f04e1b5db3022b51 (diff)
parent: d834c16516d1ebec4766fc58c059bf01311e6045 (diff)
21 files changed, 1014 insertions, 510 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 8f5b45615f7b..5d88489ef2de 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -115,12 +115,17 @@ config SPARSEMEM_EXTREME
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
        bool "Allow for memory hot-add"
-        depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
+        depends on SPARSEMEM || X86_64_ACPI_NUMA
+        depends on HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
        depends on (IA64 || X86 || PPC64)
 comment "Memory hotplug is currently incompatible with Software Suspend"
        depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
+config MEMORY_HOTPLUG_SPARSE
+        def_bool y
+        depends on SPARSEMEM && MEMORY_HOTPLUG
 # Heavily threaded applications may benefit from splitting the mm-wide
 # page_table_lock, so that faults on different parts of the user address
 # space can be handled with less contention: split it at this NR_CPUS.
diff --git a/mm/Makefile b/mm/Makefile
index 60c56c0b5e10..12b3a4eee88d 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -12,11 +12,15 @@ obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                           readahead.o swap.o truncate.o vmscan.o \
                           prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
+ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
+obj-y                   += bounce.o
+endif
 obj-$(CONFIG_SWAP)      += page_io.o swap_state.o swapfile.o thrash.o
 obj-$(CONFIG_HUGETLBFS) += hugetlb.o
 obj-$(CONFIG_NUMA)      += mempolicy.o
 obj-$(CONFIG_SPARSEMEM) += sparse.o
 obj-$(CONFIG_SHMEM) += shmem.o
+obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_SLOB) += slob.o
 obj-$(CONFIG_SLAB) += slab.o
diff --git a/mm/bounce.c b/mm/bounce.c
new file mode 100644
index 000000000000..e4b62d2a4024
--- /dev/null
+++ b/mm/bounce.c
@@ -0,0 +1,302 @@
+/* bounce buffer handling for block devices
+ *
+ * - Split from highmem.c
+ */
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/bio.h>
+#include <linux/pagemap.h>
+#include <linux/mempool.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/hash.h>
+#include <linux/highmem.h>
+#include <linux/blktrace_api.h>
+#include <asm/tlbflush.h>
+#define POOL_SIZE       64
+#define ISA_POOL_SIZE   16
+static mempool_t *page_pool, *isa_page_pool;
+#ifdef CONFIG_HIGHMEM
+static __init int init_emergency_pool(void)
+{
+        struct sysinfo i;
+        si_meminfo(&i);
+        si_swapinfo(&i);
+        if (!i.totalhigh)
+                return 0;
+        page_pool = mempool_create_page_pool(POOL_SIZE, 0);
+        BUG_ON(!page_pool);
+        printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
+        return 0;
+}
+__initcall(init_emergency_pool);
+/*
+ * highmem version, map in to vec
+ */
+static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
+{
+        unsigned long flags;
+        unsigned char *vto;
+        local_irq_save(flags);
+        vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
+        memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+        kunmap_atomic(vto, KM_BOUNCE_READ);
+        local_irq_restore(flags);
+}
+#else /* CONFIG_HIGHMEM */
+#define bounce_copy_vec(to, vfrom)      \
+        memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
+#endif /* CONFIG_HIGHMEM */
+/*
+ * allocate pages in the DMA region for the ISA pool
+ */
+static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
+{
+        return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
+}
+/*
+ * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
+ * as the max address, so check if the pool has already been created.
+ */
+int init_emergency_isa_pool(void)
+{
+        if (isa_page_pool)
+                return 0;
+        isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
+                                       mempool_free_pages, (void *) 0);
+        BUG_ON(!isa_page_pool);
+        printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
+        return 0;
+}
+/*
+ * Simple bounce buffer support for highmem pages. Depending on the
+ * queue gfp mask set, *to may or may not be a highmem page. kmap it
+ * always, it will do the Right Thing
+ */
+static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
+{
+        unsigned char *vfrom;
+        struct bio_vec *tovec, *fromvec;
+        int i;
+        __bio_for_each_segment(tovec, to, i, 0) {
+                fromvec = from->bi_io_vec + i;
+                /*
+                 * not bounced
+                 */
+                if (tovec->bv_page == fromvec->bv_page)
+                        continue;
+                /*
+                 * fromvec->bv_offset and fromvec->bv_len might have been
+                 * modified by the block layer, so use the original copy,
+                 * bounce_copy_vec already uses tovec->bv_len
+                 */
+                vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+                flush_dcache_page(tovec->bv_page);
+                bounce_copy_vec(tovec, vfrom);
+        }
+}
+static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
+{
+        struct bio *bio_orig = bio->bi_private;
+        struct bio_vec *bvec, *org_vec;
+        int i;
+        if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
+                set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
+        /*
+         * free up bounce indirect pages used
+         */
+        __bio_for_each_segment(bvec, bio, i, 0) {
+                org_vec = bio_orig->bi_io_vec + i;
+                if (bvec->bv_page == org_vec->bv_page)
+                        continue;
+                dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
+                mempool_free(bvec->bv_page, pool);
+        }
+        bio_endio(bio_orig, bio_orig->bi_size, err);
+        bio_put(bio);
+}
+static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
+{
+        if (bio->bi_size)
+                return 1;
+        bounce_end_io(bio, page_pool, err);
+        return 0;
+}
+static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
+{
+        if (bio->bi_size)
+                return 1;
+        bounce_end_io(bio, isa_page_pool, err);
+        return 0;
+}
+static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
+{
+        struct bio *bio_orig = bio->bi_private;
+        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+                copy_to_high_bio_irq(bio_orig, bio);
+        bounce_end_io(bio, pool, err);
+}
+static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
+{
+        if (bio->bi_size)
+                return 1;
+        __bounce_end_io_read(bio, page_pool, err);
+        return 0;
+}
+static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
+{
+        if (bio->bi_size)
+                return 1;
+        __bounce_end_io_read(bio, isa_page_pool, err);
+        return 0;
+}
+static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
+                               mempool_t *pool)
+{
+        struct page *page;
+        struct bio *bio = NULL;
+        int i, rw = bio_data_dir(*bio_orig);
+        struct bio_vec *to, *from;
+        bio_for_each_segment(from, *bio_orig, i) {
+                page = from->bv_page;
+                /*
+                 * is destination page below bounce pfn?
+                 */
+                if (page_to_pfn(page) < q->bounce_pfn)
+                        continue;
+                /*
+                 * irk, bounce it
+                 */
+                if (!bio)
+                        bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
+                to = bio->bi_io_vec + i;
+                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
+                to->bv_len = from->bv_len;
+                to->bv_offset = from->bv_offset;
+                inc_zone_page_state(to->bv_page, NR_BOUNCE);
+                if (rw == WRITE) {
+                        char *vto, *vfrom;
+                        flush_dcache_page(from->bv_page);
+                        vto = page_address(to->bv_page) + to->bv_offset;
+                        vfrom = kmap(from->bv_page) + from->bv_offset;
+                        memcpy(vto, vfrom, to->bv_len);
+                        kunmap(from->bv_page);
+                }
+        }
+        /*
+         * no pages bounced
+         */
+        if (!bio)
+                return;
+        /*
+         * at least one page was bounced, fill in possible non-highmem
+         * pages
+         */
+        __bio_for_each_segment(from, *bio_orig, i, 0) {
+                to = bio_iovec_idx(bio, i);
+                if (!to->bv_page) {
+                        to->bv_page = from->bv_page;
+                        to->bv_len = from->bv_len;
+                        to->bv_offset = from->bv_offset;
+                }
+        }
+        bio->bi_bdev = (*bio_orig)->bi_bdev;
+        bio->bi_flags |= (1 << BIO_BOUNCED);
+        bio->bi_sector = (*bio_orig)->bi_sector;
+        bio->bi_rw = (*bio_orig)->bi_rw;
+        bio->bi_vcnt = (*bio_orig)->bi_vcnt;
+        bio->bi_idx = (*bio_orig)->bi_idx;
+        bio->bi_size = (*bio_orig)->bi_size;
+        if (pool == page_pool) {
+                bio->bi_end_io = bounce_end_io_write;
+                if (rw == READ)
+                        bio->bi_end_io = bounce_end_io_read;
+        } else {
+                bio->bi_end_io = bounce_end_io_write_isa;
+                if (rw == READ)
+                        bio->bi_end_io = bounce_end_io_read_isa;
+        }
+        bio->bi_private = *bio_orig;
+        *bio_orig = bio;
+}
+void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
+{
+        mempool_t *pool;
+        /*
+         * for non-isa bounce case, just check if the bounce pfn is equal
+         * to or bigger than the highest pfn in the system -- in that case,
+         * don't waste time iterating over bio segments
+         */
+        if (!(q->bounce_gfp & GFP_DMA)) {
+                if (q->bounce_pfn >= blk_max_pfn)
+                        return;
+                pool = page_pool;
+        } else {
+                BUG_ON(!isa_page_pool);
+                pool = isa_page_pool;
+        }
+        blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
+        /*
+         * slow path
+         */
+        __blk_queue_bounce(q, bio_orig, pool);
+}
+EXPORT_SYMBOL(blk_queue_bounce);
diff --git a/mm/filemap.c b/mm/filemap.c
index 87d4a398cd16..fef7d879ddf5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1149,13 +1149,14 @@ success:
 * that can use the page cache directly.
 */
 ssize_t
-__generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
+generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-                unsigned long nr_segs, loff_t *ppos)
+                unsigned long nr_segs, loff_t pos)
 {
        struct file *filp = iocb->ki_filp;
        ssize_t retval;
        unsigned long seg;
        size_t count;
+        loff_t *ppos = &iocb->ki_pos;
        count = 0;
        for (seg = 0; seg < nr_segs; seg++) {
@@ -1179,7 +1180,7 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
        if (filp->f_flags & O_DIRECT) {
-                loff_t pos = *ppos, size;
+                loff_t size;
                struct address_space *mapping;
                struct inode *inode;
@@ -1225,33 +1226,8 @@ __generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
 out:
        return retval;
 }
-EXPORT_SYMBOL(__generic_file_aio_read);
-ssize_t
-generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos)
-{
-        struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-        BUG_ON(iocb->ki_pos != pos);
-        return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos);
-}
 EXPORT_SYMBOL(generic_file_aio_read);
-ssize_t
-generic_file_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
-{
-        struct iovec local_iov = { .iov_base = buf, .iov_len = count };
-        struct kiocb kiocb;
-        ssize_t ret;
-        init_sync_kiocb(&kiocb, filp);
-        ret = __generic_file_aio_read(&kiocb, &local_iov, 1, ppos);
-        if (-EIOCBQUEUED == ret)
-                ret = wait_on_sync_kiocb(&kiocb);
-        return ret;
-}
-EXPORT_SYMBOL(generic_file_read);
 int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
        ssize_t written;
@@ -1473,7 +1449,7 @@ outside_data_content:
         * accessible..
         */
        if (area->vm_mm == current->mm)
-                return NULL;
+                return NOPAGE_SIGBUS;
        /* Fall through to the non-read-ahead case */
 no_cached_page:
        /*
@@ -1498,7 +1474,7 @@ no_cached_page:
         */
        if (error == -ENOMEM)
                return NOPAGE_OOM;
-        return NULL;
+        return NOPAGE_SIGBUS;
 page_not_uptodate:
        if (!did_readaround) {
@@ -1567,7 +1543,7 @@ page_not_uptodate:
         */
        shrink_readahead_size_eio(file, ra);
        page_cache_release(page);
-        return NULL;
+        return NOPAGE_SIGBUS;
 }
 EXPORT_SYMBOL(filemap_nopage);
@@ -2022,6 +1998,7 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
                if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
                        *count = inode->i_sb->s_maxbytes - *pos;
        } else {
+#ifdef CONFIG_BLOCK
                loff_t isize;
                if (bdev_read_only(I_BDEV(inode)))
                        return -EPERM;
@@ -2033,6 +2010,9 @@ inline int generic_write_checks(struct file *file, loff_t *pos, size_t *count, i
                if (*pos + *count > isize)
                        *count = isize - *pos;
+#else
+                return -EPERM;
+#endif
        }
        return 0;
 }
@@ -2313,22 +2293,22 @@ out:
        current->backing_dev_info = NULL;
        return written ? written : err;
 }
-EXPORT_SYMBOL(generic_file_aio_write_nolock);
-ssize_t
+ssize_t generic_file_aio_write_nolock(struct kiocb *iocb,
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
+                const struct iovec *iov, unsigned long nr_segs, loff_t pos)
-                                unsigned long nr_segs, loff_t *ppos)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
        ssize_t ret;
-        loff_t pos = *ppos;
-        ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+        BUG_ON(iocb->ki_pos != pos);
+        ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
+                        &iocb->ki_pos);
        if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-                int err;
+                ssize_t err;
                err = sync_page_range_nolock(inode, mapping, pos, ret);
                if (err < 0)
@@ -2336,51 +2316,21 @@ generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
        }
        return ret;
 }
+EXPORT_SYMBOL(generic_file_aio_write_nolock);
-static ssize_t
+ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-__generic_file_write_nolock(struct file *file, const struct iovec *iov,
+                unsigned long nr_segs, loff_t pos)
-                                unsigned long nr_segs, loff_t *ppos)
-{
-        struct kiocb kiocb;
-        ssize_t ret;
-        init_sync_kiocb(&kiocb, file);
-        ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
-        if (ret == -EIOCBQUEUED)
-                ret = wait_on_sync_kiocb(&kiocb);
-        return ret;
-}
-ssize_t
-generic_file_write_nolock(struct file *file, const struct iovec *iov,
-                                unsigned long nr_segs, loff_t *ppos)
-{
-        struct kiocb kiocb;
-        ssize_t ret;
-        init_sync_kiocb(&kiocb, file);
-        ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
-        if (-EIOCBQUEUED == ret)
-                ret = wait_on_sync_kiocb(&kiocb);
-        return ret;
-}
-EXPORT_SYMBOL(generic_file_write_nolock);
-ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
-                               size_t count, loff_t pos)
 {
        struct file *file = iocb->ki_filp;
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
        ssize_t ret;
-        struct iovec local_iov = { .iov_base = (void __user *)buf,
-                                        .iov_len = count };
        BUG_ON(iocb->ki_pos != pos);
        mutex_lock(&inode->i_mutex);
-        ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1,
+        ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs,
-                                                &iocb->ki_pos);
+                        &iocb->ki_pos);
        mutex_unlock(&inode->i_mutex);
        if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
@@ -2394,66 +2344,6 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf,
 }
 EXPORT_SYMBOL(generic_file_aio_write);
-ssize_t generic_file_write(struct file *file, const char __user *buf,
-                           size_t count, loff_t *ppos)
-{
-        struct address_space *mapping = file->f_mapping;
-        struct inode *inode = mapping->host;
-        ssize_t ret;
-        struct iovec local_iov = { .iov_base = (void __user *)buf,
-                                        .iov_len = count };
-        mutex_lock(&inode->i_mutex);
-        ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
-        mutex_unlock(&inode->i_mutex);
-        if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-                ssize_t err;
-                err = sync_page_range(inode, mapping, *ppos - ret, ret);
-                if (err < 0)
-                        ret = err;
-        }
-        return ret;
-}
-EXPORT_SYMBOL(generic_file_write);
-ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
-                        unsigned long nr_segs, loff_t *ppos)
-{
-        struct kiocb kiocb;
-        ssize_t ret;
-        init_sync_kiocb(&kiocb, filp);
-        ret = __generic_file_aio_read(&kiocb, iov, nr_segs, ppos);
-        if (-EIOCBQUEUED == ret)
-                ret = wait_on_sync_kiocb(&kiocb);
-        return ret;
-}
-EXPORT_SYMBOL(generic_file_readv);
-ssize_t generic_file_writev(struct file *file, const struct iovec *iov,
-                        unsigned long nr_segs, loff_t *ppos)
-{
-        struct address_space *mapping = file->f_mapping;
-        struct inode *inode = mapping->host;
-        ssize_t ret;
-        mutex_lock(&inode->i_mutex);
-        ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
-        mutex_unlock(&inode->i_mutex);
-        if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
-                int err;
-                err = sync_page_range(inode, mapping, *ppos - ret, ret);
-                if (err < 0)
-                        ret = err;
-        }
-        return ret;
-}
-EXPORT_SYMBOL(generic_file_writev);
 /*
 * Called under i_mutex for writes to S_ISREG files.   Returns -EIO if something
 * went wrong during pagecache shootdown.
@@ -2493,3 +2383,33 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
        }
        return retval;
 }
+/**
+ * try_to_release_page() - release old fs-specific metadata on a page
+ *
+ * @page: the page which the kernel is trying to free
+ * @gfp_mask: memory allocation flags (and I/O mode)
+ *
+ * The address_space is to try to release any data against the page
+ * (presumably at page->private).  If the release was successful, return `1'.
+ * Otherwise return zero.
+ *
+ * The @gfp_mask argument specifies whether I/O may be performed to release
+ * this page (__GFP_IO), and whether the call may block (__GFP_WAIT).
+ *
+ * NOTE: @gfp_mask may go away, and this function may become non-blocking.
+ */
+int try_to_release_page(struct page *page, gfp_t gfp_mask)
+{
+        struct address_space * const mapping = page->mapping;
+        BUG_ON(!PageLocked(page));
+        if (PageWriteback(page))
+                return 0;
+        if (mapping && mapping->a_ops->releasepage)
+                return mapping->a_ops->releasepage(page, gfp_mask);
+        return try_to_free_buffers(page);
+}
+EXPORT_SYMBOL(try_to_release_page);
diff --git a/mm/fremap.c b/mm/fremap.c
index aa30618ec6b2..7a9d0f5d246d 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -39,7 +39,7 @@ static int zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
        } else {
                if (!pte_file(pte))
                        free_swap_and_cache(pte_to_swp_entry(pte));
-                pte_clear(mm, addr, ptep);
+                pte_clear_not_present_full(mm, addr, ptep, 0);
        }
        return !!page;
 }
diff --git a/mm/highmem.c b/mm/highmem.c
index ee5519b176ee..0206e7e5018c 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -29,13 +29,6 @@
 #include <linux/blktrace_api.h>
 #include <asm/tlbflush.h>
-static mempool_t *page_pool, *isa_page_pool;
-static void *mempool_alloc_pages_isa(gfp_t gfp_mask, void *data)
-{
-        return mempool_alloc_pages(gfp_mask | GFP_DMA, data);
-}
 /*
 * Virtual_count is not a pure "count".
 *  0 means that it is not mapped, and has not been mapped
@@ -217,282 +210,8 @@ void fastcall kunmap_high(struct page *page)
 }
 EXPORT_SYMBOL(kunmap_high);
-#define POOL_SIZE       64
-static __init int init_emergency_pool(void)
-{
-        struct sysinfo i;
-        si_meminfo(&i);
-        si_swapinfo(&i);
-        
-        if (!i.totalhigh)
-                return 0;
-        page_pool = mempool_create_page_pool(POOL_SIZE, 0);
-        BUG_ON(!page_pool);
-        printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
-        return 0;
-}
-__initcall(init_emergency_pool);
-/*
- * highmem version, map in to vec
- */
-static void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
-{
-        unsigned long flags;
-        unsigned char *vto;
-        local_irq_save(flags);
-        vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
-        memcpy(vto + to->bv_offset, vfrom, to->bv_len);
-        kunmap_atomic(vto, KM_BOUNCE_READ);
-        local_irq_restore(flags);
-}
-#else /* CONFIG_HIGHMEM */
-#define bounce_copy_vec(to, vfrom)      \
-        memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
 #endif
-#define ISA_POOL_SIZE   16
-/*
- * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
- * as the max address, so check if the pool has already been created.
- */
-int init_emergency_isa_pool(void)
-{
-        if (isa_page_pool)
-                return 0;
-        isa_page_pool = mempool_create(ISA_POOL_SIZE, mempool_alloc_pages_isa,
-                                       mempool_free_pages, (void *) 0);
-        BUG_ON(!isa_page_pool);
-        printk("isa bounce pool size: %d pages\n", ISA_POOL_SIZE);
-        return 0;
-}
-/*
- * Simple bounce buffer support for highmem pages. Depending on the
- * queue gfp mask set, *to may or may not be a highmem page. kmap it
- * always, it will do the Right Thing
- */
-static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
-{
-        unsigned char *vfrom;
-        struct bio_vec *tovec, *fromvec;
-        int i;
-        __bio_for_each_segment(tovec, to, i, 0) {
-                fromvec = from->bi_io_vec + i;
-                /*
-                 * not bounced
-                 */
-                if (tovec->bv_page == fromvec->bv_page)
-                        continue;
-                /*
-                 * fromvec->bv_offset and fromvec->bv_len might have been
-                 * modified by the block layer, so use the original copy,
-                 * bounce_copy_vec already uses tovec->bv_len
-                 */
-                vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
-                flush_dcache_page(tovec->bv_page);
-                bounce_copy_vec(tovec, vfrom);
-        }
-}
-static void bounce_end_io(struct bio *bio, mempool_t *pool, int err)
-{
-        struct bio *bio_orig = bio->bi_private;
-        struct bio_vec *bvec, *org_vec;
-        int i;
-        if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
-                set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
-        /*
-         * free up bounce indirect pages used
-         */
-        __bio_for_each_segment(bvec, bio, i, 0) {
-                org_vec = bio_orig->bi_io_vec + i;
-                if (bvec->bv_page == org_vec->bv_page)
-                        continue;
-                dec_zone_page_state(bvec->bv_page, NR_BOUNCE);
-                mempool_free(bvec->bv_page, pool);
-        }
-        bio_endio(bio_orig, bio_orig->bi_size, err);
-        bio_put(bio);
-}
-static int bounce_end_io_write(struct bio *bio, unsigned int bytes_done, int err)
-{
-        if (bio->bi_size)
-                return 1;
-        bounce_end_io(bio, page_pool, err);
-        return 0;
-}
-static int bounce_end_io_write_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
-        if (bio->bi_size)
-                return 1;
-        bounce_end_io(bio, isa_page_pool, err);
-        return 0;
-}
-static void __bounce_end_io_read(struct bio *bio, mempool_t *pool, int err)
-{
-        struct bio *bio_orig = bio->bi_private;
-        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
-                copy_to_high_bio_irq(bio_orig, bio);
-        bounce_end_io(bio, pool, err);
-}
-static int bounce_end_io_read(struct bio *bio, unsigned int bytes_done, int err)
-{
-        if (bio->bi_size)
-                return 1;
-        __bounce_end_io_read(bio, page_pool, err);
-        return 0;
-}
-static int bounce_end_io_read_isa(struct bio *bio, unsigned int bytes_done, int err)
-{
-        if (bio->bi_size)
-                return 1;
-        __bounce_end_io_read(bio, isa_page_pool, err);
-        return 0;
-}
-static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig,
-                               mempool_t *pool)
-{
-        struct page *page;
-        struct bio *bio = NULL;
-        int i, rw = bio_data_dir(*bio_orig);
-        struct bio_vec *to, *from;
-        bio_for_each_segment(from, *bio_orig, i) {
-                page = from->bv_page;
-                /*
-                 * is destination page below bounce pfn?
-                 */
-                if (page_to_pfn(page) < q->bounce_pfn)
-                        continue;
-                /*
-                 * irk, bounce it
-                 */
-                if (!bio)
-                        bio = bio_alloc(GFP_NOIO, (*bio_orig)->bi_vcnt);
-                to = bio->bi_io_vec + i;
-                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
-                to->bv_len = from->bv_len;
-                to->bv_offset = from->bv_offset;
-                inc_zone_page_state(to->bv_page, NR_BOUNCE);
-                if (rw == WRITE) {
-                        char *vto, *vfrom;
-                        flush_dcache_page(from->bv_page);
-                        vto = page_address(to->bv_page) + to->bv_offset;
-                        vfrom = kmap(from->bv_page) + from->bv_offset;
-                        memcpy(vto, vfrom, to->bv_len);
-                        kunmap(from->bv_page);
-                }
-        }
-        /*
-         * no pages bounced
-         */
-        if (!bio)
-                return;
-        /*
-         * at least one page was bounced, fill in possible non-highmem
-         * pages
-         */
-        __bio_for_each_segment(from, *bio_orig, i, 0) {
-                to = bio_iovec_idx(bio, i);
-                if (!to->bv_page) {
-                        to->bv_page = from->bv_page;
-                        to->bv_len = from->bv_len;
-                        to->bv_offset = from->bv_offset;
-                }
-        }
-        bio->bi_bdev = (*bio_orig)->bi_bdev;
-        bio->bi_flags |= (1 << BIO_BOUNCED);
-        bio->bi_sector = (*bio_orig)->bi_sector;
-        bio->bi_rw = (*bio_orig)->bi_rw;
-        bio->bi_vcnt = (*bio_orig)->bi_vcnt;
-        bio->bi_idx = (*bio_orig)->bi_idx;
-        bio->bi_size = (*bio_orig)->bi_size;
-        if (pool == page_pool) {
-                bio->bi_end_io = bounce_end_io_write;
-                if (rw == READ)
-                        bio->bi_end_io = bounce_end_io_read;
-        } else {
-                bio->bi_end_io = bounce_end_io_write_isa;
-                if (rw == READ)
-                        bio->bi_end_io = bounce_end_io_read_isa;
-        }
-        bio->bi_private = *bio_orig;
-        *bio_orig = bio;
-}
-void blk_queue_bounce(request_queue_t *q, struct bio **bio_orig)
-{
-        mempool_t *pool;
-        /*
-         * for non-isa bounce case, just check if the bounce pfn is equal
-         * to or bigger than the highest pfn in the system -- in that case,
-         * don't waste time iterating over bio segments
-         */
-        if (!(q->bounce_gfp & GFP_DMA)) {
-                if (q->bounce_pfn >= blk_max_pfn)
-                        return;
-                pool = page_pool;
-        } else {
-                BUG_ON(!isa_page_pool);
-                pool = isa_page_pool;
-        }
-        blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE);
-        /*
-         * slow path
-         */
-        __blk_queue_bounce(q, bio_orig, pool);
-}
-EXPORT_SYMBOL(blk_queue_bounce);
 #if defined(HASHED_PAGE_VIRTUAL)
 #define PA_HASH_ORDER   7
diff --git a/mm/memory.c b/mm/memory.c
index 601159a46ab6..9cf3f341a28a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -467,7 +467,7 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         */
        if (is_cow_mapping(vm_flags)) {
                ptep_set_wrprotect(src_mm, addr, src_pte);
-                pte = *src_pte;
+                pte = pte_wrprotect(pte);
        }
        /*
@@ -506,6 +506,7 @@ again:
        src_pte = pte_offset_map_nested(src_pmd, addr);
        src_ptl = pte_lockptr(src_mm, src_pmd);
        spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+        arch_enter_lazy_mmu_mode();
        do {
                /*
@@ -527,6 +528,7 @@ again:
                progress += 8;
        } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
+        arch_leave_lazy_mmu_mode();
        spin_unlock(src_ptl);
        pte_unmap_nested(src_pte - 1);
        add_mm_rss(dst_mm, rss[0], rss[1]);
@@ -628,6 +630,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
        int anon_rss = 0;
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+        arch_enter_lazy_mmu_mode();
        do {
                pte_t ptent = *pte;
                if (pte_none(ptent)) {
@@ -690,10 +693,11 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
                        continue;
                if (!pte_file(ptent))
                        free_swap_and_cache(pte_to_swp_entry(ptent));
-                pte_clear_full(mm, addr, pte, tlb->fullmm);
+                pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
        } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
        add_mm_rss(mm, file_rss, anon_rss);
+        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
        return addr;
@@ -1109,6 +1113,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
        pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
+        arch_enter_lazy_mmu_mode();
        do {
                struct page *page = ZERO_PAGE(addr);
                pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
@@ -1118,6 +1123,7 @@ static int zeromap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                BUG_ON(!pte_none(*pte));
                set_pte_at(mm, addr, pte, zero_pte);
        } while (pte++, addr += PAGE_SIZE, addr != end);
+        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
        return 0;
 }
@@ -1275,11 +1281,13 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
        pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
+        arch_enter_lazy_mmu_mode();
        do {
                BUG_ON(!pte_none(*pte));
                set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
+        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
        return 0;
 }
@@ -1577,7 +1585,14 @@ gotten:
                entry = mk_pte(new_page, vma->vm_page_prot);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
                lazy_mmu_prot_update(entry);
-                ptep_establish(vma, address, page_table, entry);
+                /*
+                 * Clear the pte entry and flush it first, before updating the
+                 * pte with the new entry. This will avoid a race condition
+                 * seen in the presence of one thread doing SMC and another
+                 * thread doing COW.
+                 */
+                ptep_clear_flush(vma, address, page_table);
+                set_pte_at(mm, address, page_table, entry);
                update_mmu_cache(vma, address, entry);
                lru_cache_add_active(new_page);
                page_add_new_anon_rmap(new_page, vma, address);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c37319542b70..fd678a662eae 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -13,6 +13,7 @@
 #include <linux/compiler.h>
 #include <linux/module.h>
 #include <linux/pagevec.h>
+#include <linux/writeback.h>
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
@@ -21,11 +22,41 @@
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
 #include <linux/ioport.h>
+#include <linux/cpuset.h>
 #include <asm/tlbflush.h>
-extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
+/* add this memory to iomem resource */
-                          unsigned long size);
+static struct resource *register_memory_resource(u64 start, u64 size)
+{
+        struct resource *res;
+        res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+        BUG_ON(!res);
+        res->name = "System RAM";
+        res->start = start;
+        res->end = start + size - 1;
+        res->flags = IORESOURCE_MEM;
+        if (request_resource(&iomem_resource, res) < 0) {
+                printk("System RAM resource %llx - %llx cannot be added\n",
+                (unsigned long long)res->start, (unsigned long long)res->end);
+                kfree(res);
+                res = NULL;
+        }
+        return res;
+}
+static void release_memory_resource(struct resource *res)
+{
+        if (!res)
+                return;
+        release_resource(res);
+        kfree(res);
+        return;
+}
+#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
 {
        struct pglist_data *pgdat = zone->zone_pgdat;
@@ -45,8 +76,6 @@ static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
        return 0;
 }
-extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
-                                  int nr_pages);
 static int __add_section(struct zone *zone, unsigned long phys_start_pfn)
 {
        int nr_pages = PAGES_PER_SECTION;
@@ -191,8 +220,10 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
        if (need_zonelists_rebuild)
                build_all_zonelists();
        vm_total_pages = nr_free_pagecache_pages();
+        writeback_set_ratelimit();
        return 0;
 }
+#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
 static pg_data_t *hotadd_new_pgdat(int nid, u64 start)
 {
@@ -222,36 +253,6 @@ static void rollback_node_hotadd(int nid, pg_data_t *pgdat)
        return;
 }
-/* add this memory to iomem resource */
-static struct resource *register_memory_resource(u64 start, u64 size)
-{
-        struct resource *res;
-        res = kzalloc(sizeof(struct resource), GFP_KERNEL);
-        BUG_ON(!res);
-        res->name = "System RAM";
-        res->start = start;
-        res->end = start + size - 1;
-        res->flags = IORESOURCE_MEM;
-        if (request_resource(&iomem_resource, res) < 0) {
-                printk("System RAM resource %llx - %llx cannot be added\n",
-                (unsigned long long)res->start, (unsigned long long)res->end);
-                kfree(res);
-                res = NULL;
-        }
-        return res;
-}
-static void release_memory_resource(struct resource *res)
-{
-        if (!res)
-                return;
-        release_resource(res);
-        kfree(res);
-        return;
-}
 int add_memory(int nid, u64 start, u64 size)
 {
@@ -283,6 +284,8 @@ int add_memory(int nid, u64 start, u64 size)
        /* we online node here. we can't roll back from here. */
        node_set_online(nid);
+        cpuset_track_online_nodes();
        if (new_pgdat) {
                ret = register_one_node(nid);
                /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cf18f0942553..25788b1b7fcf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1324,12 +1324,11 @@ struct mempolicy *__mpol_copy(struct mempolicy *old)
        atomic_set(&new->refcnt, 1);
        if (new->policy == MPOL_BIND) {
                int sz = ksize(old->v.zonelist);
-                new->v.zonelist = kmalloc(sz, SLAB_KERNEL);
+                new->v.zonelist = kmemdup(old->v.zonelist, sz, SLAB_KERNEL);
                if (!new->v.zonelist) {
                        kmem_cache_free(policy_cache, new);
                        return ERR_PTR(-ENOMEM);
                }
-                memcpy(new->v.zonelist, old->v.zonelist, sz);
        }
        return new;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 20a8c2687b1e..ba2453f9483d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -409,6 +409,7 @@ int migrate_page(struct address_space *mapping,
 }
 EXPORT_SYMBOL(migrate_page);
+#ifdef CONFIG_BLOCK
 /*
 * Migration function for pages with buffers. This function can only be used
 * if the underlying filesystem guarantees that no other references to "page"
@@ -466,6 +467,7 @@ int buffer_migrate_page(struct address_space *mapping,
        return 0;
 }
 EXPORT_SYMBOL(buffer_migrate_page);
+#endif
 /*
 * Writeback a page to clean the dirty state
@@ -525,7 +527,7 @@ static int fallback_migrate_page(struct address_space *mapping,
         * Buffers may be managed in a filesystem specific way.
         * We must have no buffers or drop them.
         */
-        if (page_has_buffers(page) &&
+        if (PagePrivate(page) &&
            !try_to_release_page(page, GFP_KERNEL))
                return -EAGAIN;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 955f9d0e38aa..3b8f3c0c63f3 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -34,6 +34,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
        spinlock_t *ptl;
        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
+        arch_enter_lazy_mmu_mode();
        do {
                oldpte = *pte;
                if (pte_present(oldpte)) {
@@ -70,6 +71,7 @@ static void change_pte_range(struct mm_struct *mm, pmd_t *pmd,
                }
        } while (pte++, addr += PAGE_SIZE, addr != end);
+        arch_leave_lazy_mmu_mode();
        pte_unmap_unlock(pte - 1, ptl);
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 7c15cf3373ad..9c769fa29f32 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -98,6 +98,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
        new_ptl = pte_lockptr(mm, new_pmd);
        if (new_ptl != old_ptl)
                spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
+        arch_enter_lazy_mmu_mode();
        for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
                                   new_pte++, new_addr += PAGE_SIZE) {
@@ -109,6 +110,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
                set_pte_at(mm, new_addr, new_pte, pte);
        }
+        arch_leave_lazy_mmu_mode();
        if (new_ptl != old_ptl)
                spin_unlock(new_ptl);
        pte_unmap_nested(new_pte - 1);
diff --git a/mm/nommu.c b/mm/nommu.c
index 564540662192..365019599df8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -948,7 +948,8 @@ unsigned long do_mmap_pgoff(struct file *file,
        up_write(&nommu_vma_sem);
        kfree(vml);
        if (vma) {
-                fput(vma->vm_file);
+                if (vma->vm_file)
+                        fput(vma->vm_file);
                kfree(vma);
        }
        return ret;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bada3d03119f..20f41b082e16 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -204,16 +204,30 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
        do_posix_clock_monotonic_gettime(&uptime);
        do_each_thread(g, p) {
                unsigned long points;
-                int releasing;
-                /* skip kernel threads */
+                /*
+                 * skip kernel threads and tasks which have already released
+                 * their mm.
+                 */
                if (!p->mm)
                        continue;
-                /* skip the init task with pid == 1 */
+                /* skip the init task */
-                if (p->pid == 1)
+                if (is_init(p))
                        continue;
                /*
+                 * This task already has access to memory reserves and is
+                 * being killed. Don't allow any other task access to the
+                 * memory reserve.
+                 *
+                 * Note: this may have a chance of deadlock if it gets
+                 * blocked waiting for another task which itself is waiting
+                 * for memory. Is there a better alternative?
+                 */
+                if (test_tsk_thread_flag(p, TIF_MEMDIE))
+                        return ERR_PTR(-1UL);
+                /*
                 * This is in the process of releasing memory so wait for it
                 * to finish before killing some other task by mistake.
                 *
@@ -221,21 +235,16 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
                 * go ahead if it is exiting: this will simply set TIF_MEMDIE,
                 * which will allow it to gain access to memory reserves in
                 * the process of exiting and releasing its resources.
-                 * Otherwise we could get an OOM deadlock.
+                 * Otherwise we could get an easy OOM deadlock.
                 */
-                releasing = test_tsk_thread_flag(p, TIF_MEMDIE) ||
+                if (p->flags & PF_EXITING) {
-                                                p->flags & PF_EXITING;
+                        if (p != current)
-                if (releasing) {
+                                return ERR_PTR(-1UL);
-                        /* PF_DEAD tasks have already released their mm */
-                        if (p->flags & PF_DEAD)
+                        chosen = p;
-                                continue;
+                        *ppoints = ULONG_MAX;
-                        if (p->flags & PF_EXITING && p == current) {
-                                chosen = p;
-                                *ppoints = ULONG_MAX;
-                                break;
-                        }
-                        return ERR_PTR(-1UL);
                }
                if (p->oomkilladj == OOM_DISABLE)
                        continue;
@@ -245,6 +254,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
                        *ppoints = points;
                }
        } while_each_thread(g, p);
        return chosen;
 }
@@ -255,20 +265,17 @@ static struct task_struct *select_bad_process(unsigned long *ppoints)
 */
 static void __oom_kill_task(struct task_struct *p, const char *message)
 {
-        if (p->pid == 1) {
+        if (is_init(p)) {
                WARN_ON(1);
                printk(KERN_WARNING "tried to kill init!\n");
                return;
        }
-        task_lock(p);
+        if (!p->mm) {
-        if (!p->mm || p->mm == &init_mm) {
                WARN_ON(1);
                printk(KERN_WARNING "tried to kill an mm-less task!\n");
-                task_unlock(p);
                return;
        }
-        task_unlock(p);
        if (message) {
                printk(KERN_ERR "%s: Killed process %d (%s).\n",
@@ -302,7 +309,7 @@ static int oom_kill_task(struct task_struct *p, const char *message)
         * However, this is of no concern to us.
         */
-        if (mm == NULL || mm == &init_mm)
+        if (mm == NULL)
                return 1;
        __oom_kill_task(p, message);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 555752907dc3..c0d4ce144dec 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -30,6 +30,8 @@
 #include <linux/sysctl.h>
 #include <linux/cpu.h>
 #include <linux/syscalls.h>
+#include <linux/buffer_head.h>
+#include <linux/pagevec.h>
 /*
 * The maximum number of pages to writeout in a single bdflush/kupdate
@@ -46,7 +48,6 @@
 */
 static long ratelimit_pages = 32;
-static long total_pages;        /* The total number of pages in the machine. */
 static int dirty_exceeded __cacheline_aligned_in_smp;   /* Dirty mem may be over limit */
 /*
@@ -126,7 +127,7 @@ get_dirty_limits(long *pbackground, long *pdirty,
        int unmapped_ratio;
        long background;
        long dirty;
-        unsigned long available_memory = total_pages;
+        unsigned long available_memory = vm_total_pages;
        struct task_struct *tsk;
 #ifdef CONFIG_HIGHMEM
@@ -141,7 +142,7 @@ get_dirty_limits(long *pbackground, long *pdirty,
        unmapped_ratio = 100 - ((global_page_state(NR_FILE_MAPPED) +
                                global_page_state(NR_ANON_PAGES)) * 100) /
-                                        total_pages;
+                                        vm_total_pages;
        dirty_ratio = vm_dirty_ratio;
        if (dirty_ratio > unmapped_ratio / 2)
@@ -502,9 +503,9 @@ void laptop_sync_completion(void)
 * will write six megabyte chunks, max.
 */
-static void set_ratelimit(void)
+void writeback_set_ratelimit(void)
 {
-        ratelimit_pages = total_pages / (num_online_cpus() * 32);
+        ratelimit_pages = vm_total_pages / (num_online_cpus() * 32);
        if (ratelimit_pages < 16)
                ratelimit_pages = 16;
        if (ratelimit_pages * PAGE_CACHE_SIZE > 4096 * 1024)
@@ -514,7 +515,7 @@ static void set_ratelimit(void)
 static int __cpuinit
 ratelimit_handler(struct notifier_block *self, unsigned long u, void *v)
 {
-        set_ratelimit();
+        writeback_set_ratelimit();
        return 0;
 }
@@ -533,9 +534,7 @@ void __init page_writeback_init(void)
        long buffer_pages = nr_free_buffer_pages();
        long correction;
-        total_pages = nr_free_pagecache_pages();
+        correction = (100 * 4 * buffer_pages) / vm_total_pages;
-        correction = (100 * 4 * buffer_pages) / total_pages;
        if (correction < 100) {
                dirty_background_ratio *= correction;
@@ -549,10 +548,143 @@ void __init page_writeback_init(void)
                        vm_dirty_ratio = 1;
        }
        mod_timer(&wb_timer, jiffies + dirty_writeback_interval);
-        set_ratelimit();
+        writeback_set_ratelimit();
        register_cpu_notifier(&ratelimit_nb);
 }
+/**
+ * generic_writepages - walk the list of dirty pages of the given
+ *                      address space and writepage() all of them.
+ *
+ * @mapping: address space structure to write
+ * @wbc: subtract the number of written pages from *@wbc->nr_to_write
+ *
+ * This is a library function, which implements the writepages()
+ * address_space_operation.
+ *
+ * If a page is already under I/O, generic_writepages() skips it, even
+ * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
+ * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
+ * and msync() need to guarantee that all the data which was dirty at the time
+ * the call was made get new I/O started against them.  If wbc->sync_mode is
+ * WB_SYNC_ALL then we were called for data integrity and we must wait for
+ * existing IO to complete.
+ *
+ * Derived from mpage_writepages() - if you fix this you should check that
+ * also!
+ */
+int generic_writepages(struct address_space *mapping,
+                       struct writeback_control *wbc)
+{
+        struct backing_dev_info *bdi = mapping->backing_dev_info;
+        int ret = 0;
+        int done = 0;
+        int (*writepage)(struct page *page, struct writeback_control *wbc);
+        struct pagevec pvec;
+        int nr_pages;
+        pgoff_t index;
+        pgoff_t end;            /* Inclusive */
+        int scanned = 0;
+        int range_whole = 0;
+        if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                wbc->encountered_congestion = 1;
+                return 0;
+        }
+        writepage = mapping->a_ops->writepage;
+        /* deal with chardevs and other special file */
+        if (!writepage)
+                return 0;
+        pagevec_init(&pvec, 0);
+        if (wbc->range_cyclic) {
+                index = mapping->writeback_index; /* Start from prev offset */
+                end = -1;
+        } else {
+                index = wbc->range_start >> PAGE_CACHE_SHIFT;
+                end = wbc->range_end >> PAGE_CACHE_SHIFT;
+                if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+                        range_whole = 1;
+                scanned = 1;
+        }
+retry:
+        while (!done && (index <= end) &&
+               (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
+                                              PAGECACHE_TAG_DIRTY,
+                                              min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
+                unsigned i;
+                scanned = 1;
+                for (i = 0; i < nr_pages; i++) {
+                        struct page *page = pvec.pages[i];
+                        /*
+                         * At this point we hold neither mapping->tree_lock nor
+                         * lock on the page itself: the page may be truncated or
+                         * invalidated (changing page->mapping to NULL), or even
+                         * swizzled back from swapper_space to tmpfs file
+                         * mapping
+                         */
+                        lock_page(page);
+                        if (unlikely(page->mapping != mapping)) {
+                                unlock_page(page);
+                                continue;
+                        }
+                        if (!wbc->range_cyclic && page->index > end) {
+                                done = 1;
+                                unlock_page(page);
+                                continue;
+                        }
+                        if (wbc->sync_mode != WB_SYNC_NONE)
+                                wait_on_page_writeback(page);
+                        if (PageWriteback(page) ||
+                            !clear_page_dirty_for_io(page)) {
+                                unlock_page(page);
+                                continue;
+                        }
+                        ret = (*writepage)(page, wbc);
+                        if (ret) {
+                                if (ret == -ENOSPC)
+                                        set_bit(AS_ENOSPC, &mapping->flags);
+                                else
+                                        set_bit(AS_EIO, &mapping->flags);
+                        }
+                        if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
+                                unlock_page(page);
+                        if (ret || (--(wbc->nr_to_write) <= 0))
+                                done = 1;
+                        if (wbc->nonblocking && bdi_write_congested(bdi)) {
+                                wbc->encountered_congestion = 1;
+                                done = 1;
+                        }
+                }
+                pagevec_release(&pvec);
+                cond_resched();
+        }
+        if (!scanned && !done) {
+                /*
+                 * We hit the last page and there is more work to be done: wrap
+                 * back to the start of the file
+                 */
+                scanned = 1;
+                index = 0;
+                goto retry;
+        }
+        if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+                mapping->writeback_index = index;
+        return ret;
+}
+EXPORT_SYMBOL(generic_writepages);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        int ret;
@@ -675,9 +807,11 @@ int fastcall set_page_dirty(struct page *page)
        if (likely(mapping)) {
                int (*spd)(struct page *) = mapping->a_ops->set_page_dirty;
-                if (spd)
+#ifdef CONFIG_BLOCK
-                        return (*spd)(page);
+                if (!spd)
-                return __set_page_dirty_buffers(page);
+                        spd = __set_page_dirty_buffers;
+#endif
+                return (*spd)(page);
        }
        if (!PageDirty(page)) {
                if (!TestSetPageDirty(page))
diff --git a/mm/shmem.c b/mm/shmem.c
index eda907c3a86a..bb8ca7ef7094 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -26,6 +26,8 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/fs.h>
+#include <linux/xattr.h>
+#include <linux/generic_acl.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/file.h>
@@ -177,6 +179,7 @@ static const struct address_space_operations shmem_aops;
 static struct file_operations shmem_file_operations;
 static struct inode_operations shmem_inode_operations;
 static struct inode_operations shmem_dir_inode_operations;
+static struct inode_operations shmem_special_inode_operations;
 static struct vm_operations_struct shmem_vm_ops;
 static struct backing_dev_info shmem_backing_dev_info  __read_mostly = {
@@ -637,7 +640,7 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
        struct page *page = NULL;
        int error;
-        if (attr->ia_valid & ATTR_SIZE) {
+        if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
                if (attr->ia_size < inode->i_size) {
                        /*
                         * If truncating down to a partial page, then
@@ -670,6 +673,10 @@ static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
        error = inode_change_ok(inode, attr);
        if (!error)
                error = inode_setattr(inode, attr);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+        if (!error && (attr->ia_valid & ATTR_MODE))
+                error = generic_acl_chmod(inode, &shmem_acl_ops);
+#endif
        if (page)
                page_cache_release(page);
        return error;
@@ -1362,6 +1369,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                switch (mode & S_IFMT) {
                default:
+                        inode->i_op = &shmem_special_inode_operations;
                        init_special_inode(inode, mode, dev);
                        break;
                case S_IFREG:
@@ -1371,7 +1379,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
                                                        &sbinfo->policy_nodes);
                        break;
                case S_IFDIR:
-                        inode->i_nlink++;
+                        inc_nlink(inode);
                        /* Some things misbehave if size == 0 on a directory */
                        inode->i_size = 2 * BOGO_DIRENT_SIZE;
                        inode->i_op = &shmem_dir_inode_operations;
@@ -1682,7 +1690,11 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
                                iput(inode);
                                return error;
                        }
-                        error = 0;
+                }
+                error = shmem_acl_init(inode, dir);
+                if (error) {
+                        iput(inode);
+                        return error;
                }
                if (dir->i_mode & S_ISGID) {
                        inode->i_gid = dir->i_gid;
@@ -1703,7 +1715,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, int mode)
        if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
                return error;
-        dir->i_nlink++;
+        inc_nlink(dir);
        return 0;
 }
@@ -1738,7 +1750,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
        dir->i_size += BOGO_DIRENT_SIZE;
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-        inode->i_nlink++;
+        inc_nlink(inode);
        atomic_inc(&inode->i_count);    /* New dentry reference */
        dget(dentry);           /* Extra pinning count for the created dentry */
        d_instantiate(dentry, inode);
@@ -1760,7 +1772,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
        dir->i_size -= BOGO_DIRENT_SIZE;
        inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-        inode->i_nlink--;
+        drop_nlink(inode);
        dput(dentry);   /* Undo the count from "create" - this does all the work */
        return 0;
 }
@@ -1770,8 +1782,8 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
        if (!simple_empty(dentry))
                return -ENOTEMPTY;
-        dentry->d_inode->i_nlink--;
+        drop_nlink(dentry->d_inode);
-        dir->i_nlink--;
+        drop_nlink(dir);
        return shmem_unlink(dir, dentry);
 }
@@ -1792,10 +1804,10 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct
        if (new_dentry->d_inode) {
                (void) shmem_unlink(new_dir, new_dentry);
                if (they_are_dirs)
-                        old_dir->i_nlink--;
+                        drop_nlink(old_dir);
        } else if (they_are_dirs) {
-                old_dir->i_nlink--;
+                drop_nlink(old_dir);
-                new_dir->i_nlink++;
+                inc_nlink(new_dir);
        }
        old_dir->i_size -= BOGO_DIRENT_SIZE;
@@ -1897,6 +1909,53 @@ static struct inode_operations shmem_symlink_inode_operations = {
        .put_link       = shmem_put_link,
 };
+#ifdef CONFIG_TMPFS_POSIX_ACL
+/**
+ * Superblocks without xattr inode operations will get security.* xattr
+ * support from the VFS "for free". As soon as we have any other xattrs
+ * like ACLs, we also need to implement the security.* handlers at
+ * filesystem level, though.
+ */
+static size_t shmem_xattr_security_list(struct inode *inode, char *list,
+                                        size_t list_len, const char *name,
+                                        size_t name_len)
+{
+        return security_inode_listsecurity(inode, list, list_len);
+}
+static int shmem_xattr_security_get(struct inode *inode, const char *name,
+                                    void *buffer, size_t size)
+{
+        if (strcmp(name, "") == 0)
+                return -EINVAL;
+        return security_inode_getsecurity(inode, name, buffer, size,
+                                          -EOPNOTSUPP);
+}
+static int shmem_xattr_security_set(struct inode *inode, const char *name,
+                                    const void *value, size_t size, int flags)
+{
+        if (strcmp(name, "") == 0)
+                return -EINVAL;
+        return security_inode_setsecurity(inode, name, value, size, flags);
+}
+struct xattr_handler shmem_xattr_security_handler = {
+        .prefix = XATTR_SECURITY_PREFIX,
+        .list   = shmem_xattr_security_list,
+        .get    = shmem_xattr_security_get,
+        .set    = shmem_xattr_security_set,
+};
+static struct xattr_handler *shmem_xattr_handlers[] = {
+        &shmem_xattr_acl_access_handler,
+        &shmem_xattr_acl_default_handler,
+        &shmem_xattr_security_handler,
+        NULL
+};
+#endif
 static int shmem_parse_options(char *options, int *mode, uid_t *uid,
        gid_t *gid, unsigned long *blocks, unsigned long *inodes,
        int *policy, nodemask_t *policy_nodes)
@@ -2094,6 +2153,10 @@ static int shmem_fill_super(struct super_block *sb,
        sb->s_magic = TMPFS_MAGIC;
        sb->s_op = &shmem_ops;
        sb->s_time_gran = 1;
+#ifdef CONFIG_TMPFS_POSIX_ACL
+        sb->s_xattr = shmem_xattr_handlers;
+        sb->s_flags |= MS_POSIXACL;
+#endif
        inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
        if (!inode)
@@ -2130,6 +2193,7 @@ static void shmem_destroy_inode(struct inode *inode)
                /* only struct inode is valid if it's an inline symlink */
                mpol_free_shared_policy(&SHMEM_I(inode)->policy);
        }
+        shmem_acl_destroy_inode(inode);
        kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
 }
@@ -2141,6 +2205,10 @@ static void init_once(void *foo, struct kmem_cache *cachep,
        if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
            SLAB_CTOR_CONSTRUCTOR) {
                inode_init_once(&p->vfs_inode);
+#ifdef CONFIG_TMPFS_POSIX_ACL
+                p->i_acl = NULL;
+                p->i_default_acl = NULL;
+#endif
        }
 }
@@ -2184,6 +2252,14 @@ static struct inode_operations shmem_inode_operations = {
        .truncate       = shmem_truncate,
        .setattr        = shmem_notify_change,
        .truncate_range = shmem_truncate_range,
+#ifdef CONFIG_TMPFS_POSIX_ACL
+        .setxattr       = generic_setxattr,
+        .getxattr       = generic_getxattr,
+        .listxattr      = generic_listxattr,
+        .removexattr    = generic_removexattr,
+        .permission     = shmem_permission,
+#endif
 };
 static struct inode_operations shmem_dir_inode_operations = {
@@ -2198,6 +2274,25 @@ static struct inode_operations shmem_dir_inode_operations = {
        .mknod          = shmem_mknod,
        .rename         = shmem_rename,
 #endif
+#ifdef CONFIG_TMPFS_POSIX_ACL
+        .setattr        = shmem_notify_change,
+        .setxattr       = generic_setxattr,
+        .getxattr       = generic_getxattr,
+        .listxattr      = generic_listxattr,
+        .removexattr    = generic_removexattr,
+        .permission     = shmem_permission,
+#endif
+};
+static struct inode_operations shmem_special_inode_operations = {
+#ifdef CONFIG_TMPFS_POSIX_ACL
+        .setattr        = shmem_notify_change,
+        .setxattr       = generic_setxattr,
+        .getxattr       = generic_getxattr,
+        .listxattr      = generic_listxattr,
+        .removexattr    = generic_removexattr,
+        .permission     = shmem_permission,
+#endif
 };
 static struct super_operations shmem_ops = {
diff --git a/mm/shmem_acl.c b/mm/shmem_acl.c
new file mode 100644
index 000000000000..c946bf468718
--- /dev/null
+++ b/mm/shmem_acl.c
@@ -0,0 +1,197 @@
+/*
+ * mm/shmem_acl.c
+ *
+ * (C) 2005 Andreas Gruenbacher <agruen@suse.de>
+ *
+ * This file is released under the GPL.
+ */
+#include <linux/fs.h>
+#include <linux/shmem_fs.h>
+#include <linux/xattr.h>
+#include <linux/generic_acl.h>
+/**
+ * shmem_get_acl  -   generic_acl_operations->getacl() operation
+ */
+static struct posix_acl *
+shmem_get_acl(struct inode *inode, int type)
+{
+        struct posix_acl *acl = NULL;
+        spin_lock(&inode->i_lock);
+        switch(type) {
+                case ACL_TYPE_ACCESS:
+                        acl = posix_acl_dup(SHMEM_I(inode)->i_acl);
+                        break;
+                case ACL_TYPE_DEFAULT:
+                        acl = posix_acl_dup(SHMEM_I(inode)->i_default_acl);
+                        break;
+        }
+        spin_unlock(&inode->i_lock);
+        return acl;
+}
+/**
+ * shmem_get_acl  -   generic_acl_operations->setacl() operation
+ */
+static void
+shmem_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+        struct posix_acl *free = NULL;
+        spin_lock(&inode->i_lock);
+        switch(type) {
+                case ACL_TYPE_ACCESS:
+                        free = SHMEM_I(inode)->i_acl;
+                        SHMEM_I(inode)->i_acl = posix_acl_dup(acl);
+                        break;
+                case ACL_TYPE_DEFAULT:
+                        free = SHMEM_I(inode)->i_default_acl;
+                        SHMEM_I(inode)->i_default_acl = posix_acl_dup(acl);
+                        break;
+        }
+        spin_unlock(&inode->i_lock);
+        posix_acl_release(free);
+}
+struct generic_acl_operations shmem_acl_ops = {
+        .getacl = shmem_get_acl,
+        .setacl = shmem_set_acl,
+};
+/**
+ * shmem_list_acl_access, shmem_get_acl_access, shmem_set_acl_access,
+ * shmem_xattr_acl_access_handler  -  plumbing code to implement the
+ * system.posix_acl_access xattr using the generic acl functions.
+ */
+static size_t
+shmem_list_acl_access(struct inode *inode, char *list, size_t list_size,
+                      const char *name, size_t name_len)
+{
+        return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_ACCESS,
+                                list, list_size);
+}
+static int
+shmem_get_acl_access(struct inode *inode, const char *name, void *buffer,
+                     size_t size)
+{
+        if (strcmp(name, "") != 0)
+                return -EINVAL;
+        return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, buffer,
+                               size);
+}
+static int
+shmem_set_acl_access(struct inode *inode, const char *name, const void *value,
+                     size_t size, int flags)
+{
+        if (strcmp(name, "") != 0)
+                return -EINVAL;
+        return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_ACCESS, value,
+                               size);
+}
+struct xattr_handler shmem_xattr_acl_access_handler = {
+        .prefix = POSIX_ACL_XATTR_ACCESS,
+        .list   = shmem_list_acl_access,
+        .get    = shmem_get_acl_access,
+        .set    = shmem_set_acl_access,
+};
+/**
+ * shmem_list_acl_default, shmem_get_acl_default, shmem_set_acl_default,
+ * shmem_xattr_acl_default_handler  -  plumbing code to implement the
+ * system.posix_acl_default xattr using the generic acl functions.
+ */
+static size_t
+shmem_list_acl_default(struct inode *inode, char *list, size_t list_size,
+                       const char *name, size_t name_len)
+{
+        return generic_acl_list(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT,
+                                list, list_size);
+}
+static int
+shmem_get_acl_default(struct inode *inode, const char *name, void *buffer,
+                      size_t size)
+{
+        if (strcmp(name, "") != 0)
+                return -EINVAL;
+        return generic_acl_get(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, buffer,
+                               size);
+}
+static int
+shmem_set_acl_default(struct inode *inode, const char *name, const void *value,
+                      size_t size, int flags)
+{
+        if (strcmp(name, "") != 0)
+                return -EINVAL;
+        return generic_acl_set(inode, &shmem_acl_ops, ACL_TYPE_DEFAULT, value,
+                               size);
+}
+struct xattr_handler shmem_xattr_acl_default_handler = {
+        .prefix = POSIX_ACL_XATTR_DEFAULT,
+        .list   = shmem_list_acl_default,
+        .get    = shmem_get_acl_default,
+        .set    = shmem_set_acl_default,
+};
+/**
+ * shmem_acl_init  -  Inizialize the acl(s) of a new inode
+ */
+int
+shmem_acl_init(struct inode *inode, struct inode *dir)
+{
+        return generic_acl_init(inode, dir, &shmem_acl_ops);
+}
+/**
+ * shmem_acl_destroy_inode  -  destroy acls hanging off the in-memory inode
+ *
+ * This is done before destroying the actual inode.
+ */
+void
+shmem_acl_destroy_inode(struct inode *inode)
+{
+        if (SHMEM_I(inode)->i_acl)
+                posix_acl_release(SHMEM_I(inode)->i_acl);
+        SHMEM_I(inode)->i_acl = NULL;
+        if (SHMEM_I(inode)->i_default_acl)
+                posix_acl_release(SHMEM_I(inode)->i_default_acl);
+        SHMEM_I(inode)->i_default_acl = NULL;
+}
+/**
+ * shmem_check_acl  -  check_acl() callback for generic_permission()
+ */
+static int
+shmem_check_acl(struct inode *inode, int mask)
+{
+        struct posix_acl *acl = shmem_get_acl(inode, ACL_TYPE_ACCESS);
+        if (acl) {
+                int error = posix_acl_permission(inode, acl, mask);
+                posix_acl_release(acl);
+                return error;
+        }
+        return -EAGAIN;
+}
+/**
+ * shmem_permission  -  permission() inode operation
+ */
+int
+shmem_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+        return generic_permission(inode, mask, shmem_check_acl);
+}
diff --git a/mm/slab.c b/mm/slab.c
index 792bfe320a8b..3dbd6f4e7477 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1683,10 +1683,32 @@ static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
 static void dump_line(char *data, int offset, int limit)
 {
        int i;
+        unsigned char error = 0;
+        int bad_count = 0;
        printk(KERN_ERR "%03x:", offset);
-        for (i = 0; i < limit; i++)
+        for (i = 0; i < limit; i++) {
+                if (data[offset + i] != POISON_FREE) {
+                        error = data[offset + i];
+                        bad_count++;
+                }
                printk(" %02x", (unsigned char)data[offset + i]);
+        }
        printk("\n");
+        if (bad_count == 1) {
+                error ^= POISON_FREE;
+                if (!(error & (error - 1))) {
+                        printk(KERN_ERR "Single bit error detected. Probably "
+                                        "bad RAM.\n");
+#ifdef CONFIG_X86
+                        printk(KERN_ERR "Run memtest86+ or a similar memory "
+                                        "test tool.\n");
+#else
+                        printk(KERN_ERR "Run a memory test tool.\n");
+#endif
+                }
+        }
 }
 #endif
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f1f5ec783781..a15def63f28f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1723,13 +1723,14 @@ get_swap_info_struct(unsigned type)
 */
 int valid_swaphandles(swp_entry_t entry, unsigned long *offset)
 {
-        int ret = 0, i = 1 << page_cluster;
+        int our_page_cluster = page_cluster;
+        int ret = 0, i = 1 << our_page_cluster;
        unsigned long toff;
        struct swap_info_struct *swapdev = swp_type(entry) + swap_info;
-        if (!page_cluster)      /* no readahead */
+        if (!our_page_cluster)  /* no readahead */
                return 0;
-        toff = (swp_offset(entry) >> page_cluster) << page_cluster;
+        toff = (swp_offset(entry) >> our_page_cluster) << our_page_cluster;
        if (!toff)              /* first page is swap header */
                toff++, i--;
        *offset = toff;
diff --git a/mm/truncate.c b/mm/truncate.c
index a654928323dc..f4edbc179d14 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -17,6 +17,32 @@
                                   do_invalidatepage */
+/**
+ * do_invalidatepage - invalidate part of all of a page
+ * @page: the page which is affected
+ * @offset: the index of the truncation point
+ *
+ * do_invalidatepage() is called when all or part of the page has become
+ * invalidated by a truncate operation.
+ *
+ * do_invalidatepage() does not have to release all buffers, but it must
+ * ensure that no dirty buffer is left outside @offset and that no I/O
+ * is underway against any of the blocks which are outside the truncation
+ * point.  Because the caller is about to free (and possibly reuse) those
+ * blocks on-disk.
+ */
+void do_invalidatepage(struct page *page, unsigned long offset)
+{
+        void (*invalidatepage)(struct page *, unsigned long);
+        invalidatepage = page->mapping->a_ops->invalidatepage;
+#ifdef CONFIG_BLOCK
+        if (!invalidatepage)
+                invalidatepage = block_invalidatepage;
+#endif
+        if (invalidatepage)
+                (*invalidatepage)(page, offset);
+}
 static inline void truncate_partial_page(struct page *page, unsigned partial)
 {
        memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial);
@@ -261,9 +287,39 @@ unsigned long invalidate_inode_pages(struct address_space *mapping)
 {
        return invalidate_mapping_pages(mapping, 0, ~0UL);
 }
 EXPORT_SYMBOL(invalidate_inode_pages);
+/*
+ * This is like invalidate_complete_page(), except it ignores the page's
+ * refcount.  We do this because invalidate_inode_pages2() needs stronger
+ * invalidation guarantees, and cannot afford to leave pages behind because
+ * shrink_list() has a temp ref on them, or because they're transiently sitting
+ * in the lru_cache_add() pagevecs.
+ */
+static int
+invalidate_complete_page2(struct address_space *mapping, struct page *page)
+{
+        if (page->mapping != mapping)
+                return 0;
+        if (PagePrivate(page) && !try_to_release_page(page, 0))
+                return 0;
+        write_lock_irq(&mapping->tree_lock);
+        if (PageDirty(page))
+                goto failed;
+        BUG_ON(PagePrivate(page));
+        __remove_from_page_cache(page);
+        write_unlock_irq(&mapping->tree_lock);
+        ClearPageUptodate(page);
+        page_cache_release(page);       /* pagecache ref */
+        return 1;
+failed:
+        write_unlock_irq(&mapping->tree_lock);
+        return 0;
+}
 /**
 * invalidate_inode_pages2_range - remove range of pages from an address_space
 * @mapping: the address_space
@@ -330,7 +386,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
                                }
                        }
                        was_dirty = test_clear_page_dirty(page);
-                        if (!invalidate_complete_page(mapping, page)) {
+                        if (!invalidate_complete_page2(mapping, page)) {
                                if (was_dirty)
                                        set_page_dirty(page);
                                ret = -EIO;
diff --git a/mm/util.c b/mm/util.c
index 7368479220b3..e14fa84ef39a 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -40,6 +40,24 @@ char *kstrdup(const char *s, gfp_t gfp)
 }
 EXPORT_SYMBOL(kstrdup);
+/**
+ * kmemdup - duplicate region of memory
+ *
+ * @src: memory region to duplicate
+ * @len: memory region length
+ * @gfp: GFP mask to use
+ */
+void *kmemdup(const void *src, size_t len, gfp_t gfp)
+{
+        void *p;
+        p = ____kmalloc(len, gfp);
+        if (p)
+                memcpy(p, src, len);
+        return p;
+}
+EXPORT_SYMBOL(kmemdup);
 /*
 * strndup_user - duplicate an existing string from user space
 *
author	Steven Whitehouse <swhiteho@redhat.com>	2006-10-02 08:45:08 -0400
committer	Steven Whitehouse <swhiteho@redhat.com>	2006-10-02 08:45:08 -0400
commit	59458f40e25915a355d8b1d701425fe9f4f9ea23 (patch)
tree	f1c9a2934df686e36d75f759ab7313b6f0e0e5f9 /mm
parent	825f9075d74028d11d7f5932f04e1b5db3022b51 (diff)
parent	d834c16516d1ebec4766fc58c059bf01311e6045 (diff)