14 files changed, 143 insertions, 107 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a0860640378d..71034f41a2ba 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -724,6 +724,14 @@ void bdi_destroy(struct backing_dev_info *bdi)
        bdi_unregister(bdi);
+        /*
+         * If bdi_unregister() had already been called earlier, the
+         * wakeup_timer could still be armed because bdi_prune_sb()
+         * can race with the bdi_wakeup_thread_delayed() calls from
+         * __mark_inode_dirty().
+         */
+        del_timer_sync(&bdi->wb.wakeup_timer);
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
                percpu_counter_destroy(&bdi->bdi_stat[i]);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298abaae153..36b3d988b4ef 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage)
 static void khugepaged_alloc_sleep(void)
 {
-        DEFINE_WAIT(wait);
+        wait_event_freezable_timeout(khugepaged_wait, false,
-        add_wait_queue(&khugepaged_wait, &wait);
+                        msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
-        schedule_timeout_interruptible(
-                msecs_to_jiffies(
-                        khugepaged_alloc_sleep_millisecs));
-        remove_wait_queue(&khugepaged_wait, &wait);
 }
 #ifndef CONFIG_NUMA
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void)
                if (unlikely(kthread_should_stop()))
                        break;
                if (khugepaged_has_work()) {
-                        DEFINE_WAIT(wait);
                        if (!khugepaged_scan_sleep_millisecs)
                                continue;
-                        add_wait_queue(&khugepaged_wait, &wait);
+                        wait_event_freezable_timeout(khugepaged_wait, false,
-                        schedule_timeout_interruptible(
+                            msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
-                                msecs_to_jiffies(
-                                        khugepaged_scan_sleep_millisecs));
-                        remove_wait_queue(&khugepaged_wait, &wait);
                } else if (khugepaged_enabled())
                        wait_event_freezable(khugepaged_wait,
                                             khugepaged_wait_event());
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dae27ba3be2c..73f17c0293c0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
                __SetPageTail(p);
+                set_page_count(p, 0);
                p->first_page = page;
        }
 }
@@ -2422,6 +2423,8 @@ retry_avoidcopy:
         * anon_vma prepared.
         */
        if (unlikely(anon_vma_prepare(vma))) {
+                page_cache_release(new_page);
+                page_cache_release(old_page);
                /* Caller expects lock to be held */
                spin_lock(&mm->page_table_lock);
                return VM_FAULT_OOM;
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e29174fa6..177aca424a06 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
        if (anon_vma)
                put_anon_vma(anon_vma);
-out:
        unlock_page(hpage);
+out:
        if (rc != -EAGAIN) {
                list_del(&hpage->lru);
                put_page(hpage);
diff --git a/mm/nommu.c b/mm/nommu.c
index 73419c55eda6..b982290fd962 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -454,7 +454,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 *      between processes, it syncs the pagetable across all
 *      processes.
 */
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
        BUG();
        return NULL;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 471dedb463ab..76f2c5ae908e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -185,6 +185,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
        if (!p)
                return 0;
+        if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+                task_unlock(p);
+                return 0;
+        }
        /*
         * The memory controller may have a limit of 0 bytes, so avoid a divide
         * by zero, if necessary.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a3278f005230..71252486bc6f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
 *
 */
 static struct prop_descriptor vm_completions;
-static struct prop_descriptor vm_dirties;
 /*
 * couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
 {
        int shift = calc_period_shift();
        prop_change_shift(&vm_completions, shift);
-        prop_change_shift(&vm_dirties, shift);
        writeback_set_ratelimit();
 }
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL_GPL(bdi_writeout_inc);
-void task_dirty_inc(struct task_struct *tsk)
-{
-        prop_inc_single(&vm_dirties, &tsk->dirties);
-}
 /*
 * Obtain an accurate fraction of the BDI's portion.
 */
@@ -1133,17 +1126,17 @@ pause:
                                          pages_dirtied,
                                          pause,
                                          start_time);
-                __set_current_state(TASK_UNINTERRUPTIBLE);
+                __set_current_state(TASK_KILLABLE);
                io_schedule_timeout(pause);
-                dirty_thresh = hard_dirty_limit(dirty_thresh);
                /*
-                 * max-pause area. If dirty exceeded but still within this
+                 * This is typically equal to (nr_dirty < dirty_thresh) and can
-                 * area, no need to sleep for more than 200ms: (a) 8 pages per
+                 * also keep "1000+ dd on a slow USB stick" under control.
-                 * 200ms is typically more than enough to curb heavy dirtiers;
-                 * (b) the pause time limit makes the dirtiers more responsive.
                 */
-                if (nr_dirty < dirty_thresh)
+                if (task_ratelimit)
+                        break;
+                if (fatal_signal_pending(current))
                        break;
        }
@@ -1395,7 +1388,6 @@ void __init page_writeback_init(void)
        shift = calc_period_shift();
        prop_descriptor_init(&vm_completions, shift);
-        prop_descriptor_init(&vm_dirties, shift);
 }
 /**
@@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
                __inc_zone_page_state(page, NR_DIRTIED);
                __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
                __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
-                task_dirty_inc(current);
                task_io_account_write(PAGE_CACHE_SIZE);
        }
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9dd443d89d8b..2b8ba3aebf6e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
                __SetPageTail(p);
+                set_page_count(p, 0);
                p->first_page = page;
        }
 }
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        unsigned long block_migratetype;
        int reserve;
-        /* Get the start pfn, end pfn and the number of blocks to reserve */
+        /*
+         * Get the start pfn, end pfn and the number of blocks to reserve
+         * We have to be careful to be aligned to pageblock_nr_pages to
+         * make sure that we always check pfn_valid for the first page in
+         * the block.
+         */
        start_pfn = zone->zone_start_pfn;
        end_pfn = start_pfn + zone->spanned_pages;
+        start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index ea534960a04b..12a48a88c0d8 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
        if (!pages || !bitmap) {
                if (may_alloc && !pages)
-                        pages = pcpu_mem_alloc(pages_size);
+                        pages = pcpu_mem_zalloc(pages_size);
                if (may_alloc && !bitmap)
-                        bitmap = pcpu_mem_alloc(bitmap_size);
+                        bitmap = pcpu_mem_zalloc(bitmap_size);
                if (!pages || !bitmap)
                        return NULL;
        }
-        memset(pages, 0, pages_size);
        bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
        *bitmapp = bitmap;
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
                                 int page_start, int page_end)
 {
        flush_cache_vunmap(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
                                      int page_start, int page_end)
 {
        flush_tlb_kernel_range(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
                                int page_start, int page_end)
 {
        flush_cache_vmap(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 /**
diff --git a/mm/percpu.c b/mm/percpu.c
index bf80e55dbed7..3bb810a72006 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
-/* cpus with the lowest and highest unit numbers */
+/* cpus with the lowest and highest unit addresses */
-static unsigned int pcpu_first_unit_cpu __read_mostly;
+static unsigned int pcpu_low_unit_cpu __read_mostly;
-static unsigned int pcpu_last_unit_cpu __read_mostly;
+static unsigned int pcpu_high_unit_cpu __read_mostly;
 /* the address of the first chunk which starts with the kernel static area */
 void *pcpu_base_addr __read_mostly;
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
             (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
 /**
- * pcpu_mem_alloc - allocate memory
+ * pcpu_mem_zalloc - allocate memory
 * @size: bytes to allocate
 *
 * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vmalloc() is used.  The returned
+ * kzalloc() is used; otherwise, vzalloc() is used.  The returned
 * memory is always zeroed.
 *
 * CONTEXT:
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
 * RETURNS:
 * Pointer to the allocated area on success, NULL on failure.
 */
-static void *pcpu_mem_alloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size)
 {
        if (WARN_ON_ONCE(!slab_is_available()))
                return NULL;
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size)
 * @ptr: memory to free
 * @size: size of the area
 *
- * Free @ptr.  @ptr should have been allocated using pcpu_mem_alloc().
+ * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
 */
 static void pcpu_mem_free(void *ptr, size_t size)
 {
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
        size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
        unsigned long flags;
-        new = pcpu_mem_alloc(new_size);
+        new = pcpu_mem_zalloc(new_size);
        if (!new)
                return -ENOMEM;
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 {
        struct pcpu_chunk *chunk;
-        chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
+        chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
        if (!chunk)
                return NULL;
-        chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
+        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
+                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
                kfree(chunk);
                return NULL;
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr)
 * address.  The caller is responsible for ensuring @addr stays valid
 * until this function finishes.
 *
+ * percpu allocator has special setup for the first chunk, which currently
+ * supports either embedding in linear address space or vmalloc mapping,
+ * and, from the second one, the backing allocator (currently either vm or
+ * km) provides translation.
+ *
+ * The addr can be tranlated simply without checking if it falls into the
+ * first chunk. But the current code reflects better how percpu allocator
+ * actually works, and the verification can discover both bugs in percpu
+ * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
+ * code.
+ *
 * RETURNS:
 * The physical address for @addr.
 */
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
 {
        void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
        bool in_first_chunk = false;
-        unsigned long first_start, first_end;
+        unsigned long first_low, first_high;
        unsigned int cpu;
        /*
-         * The following test on first_start/end isn't strictly
+         * The following test on unit_low/high isn't strictly
         * necessary but will speed up lookups of addresses which
         * aren't in the first chunk.
         */
-        first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
+        first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
-        first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
+        first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
-                                    pcpu_unit_pages);
+                                     pcpu_unit_pages);
-        if ((unsigned long)addr >= first_start &&
+        if ((unsigned long)addr >= first_low &&
-            (unsigned long)addr < first_end) {
+            (unsigned long)addr < first_high) {
                for_each_possible_cpu(cpu) {
                        void *start = per_cpu_ptr(base, cpu);
@@ -1233,7 +1245,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
        for (cpu = 0; cpu < nr_cpu_ids; cpu++)
                unit_map[cpu] = UINT_MAX;
-        pcpu_first_unit_cpu = NR_CPUS;
+        pcpu_low_unit_cpu = NR_CPUS;
+        pcpu_high_unit_cpu = NR_CPUS;
        for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
                const struct pcpu_group_info *gi = &ai->groups[group];
@@ -1253,9 +1267,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                        unit_map[cpu] = unit + i;
                        unit_off[cpu] = gi->base_offset + i * ai->unit_size;
-                        if (pcpu_first_unit_cpu == NR_CPUS)
+                        /* determine low/high unit_cpu */
-                                pcpu_first_unit_cpu = cpu;
+                        if (pcpu_low_unit_cpu == NR_CPUS ||
-                        pcpu_last_unit_cpu = cpu;
+                            unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
+                                pcpu_low_unit_cpu = cpu;
+                        if (pcpu_high_unit_cpu == NR_CPUS ||
+                            unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
+                                pcpu_high_unit_cpu = cpu;
                }
        }
        pcpu_nr_units = unit;
@@ -1889,7 +1907,7 @@ void __init percpu_init_late(void)
                BUILD_BUG_ON(size > PAGE_SIZE);
-                map = pcpu_mem_alloc(size);
+                map = pcpu_mem_zalloc(size);
                BUG_ON(!map);
                spin_lock_irqsave(&pcpu_lock, flags);
diff --git a/mm/slab.c b/mm/slab.c
index 708efe886154..83311c9aaf9d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -595,6 +595,7 @@ static enum {
        PARTIAL_AC,
        PARTIAL_L3,
        EARLY,
+        LATE,
        FULL
 } g_cpucache_up;
@@ -671,7 +672,7 @@ static void init_node_lock_keys(int q)
 {
        struct cache_sizes *s = malloc_sizes;
-        if (g_cpucache_up != FULL)
+        if (g_cpucache_up < LATE)
                return;
        for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void)
 {
        struct kmem_cache *cachep;
+        g_cpucache_up = LATE;
        /* Annotate slab for lockdep -- annotate the malloc caches */
        init_lock_keys();
diff --git a/mm/slub.c b/mm/slub.c
index 7d2a996c307e..ed3334d9b6da 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s)
 {
        struct kmem_cache_node *n = NULL;
        struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
-        struct page *page;
+        struct page *page, *discard_page = NULL;
        while ((page = c->partial)) {
                enum slab_modes { M_PARTIAL, M_FREE };
@@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s)
                                if (l == M_PARTIAL)
                                        remove_partial(n, page);
                                else
-                                        add_partial(n, page, 1);
+                                        add_partial(n, page,
+                                                DEACTIVATE_TO_TAIL);
                                l = m;
                        }
@@ -1915,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s)
                                "unfreezing slab"));
                if (m == M_FREE) {
-                        stat(s, DEACTIVATE_EMPTY);
+                        page->next = discard_page;
-                        discard_slab(s, page);
+                        discard_page = page;
-                        stat(s, FREE_SLAB);
                }
        }
        if (n)
                spin_unlock(&n->list_lock);
+        while (discard_page) {
+                page = discard_page;
+                discard_page = discard_page->next;
+                stat(s, DEACTIVATE_EMPTY);
+                discard_slab(s, page);
+                stat(s, FREE_SLAB);
+        }
 }
 /*
@@ -1969,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
                page->pobjects = pobjects;
                page->next = oldpage;
-        } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+        } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
        stat(s, CPU_PARTIAL_FREE);
        return pobjects;
 }
@@ -4435,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                for_each_possible_cpu(cpu) {
                        struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+                        int node = ACCESS_ONCE(c->node);
                        struct page *page;
-                        if (!c || c->node < 0)
+                        if (node < 0)
                                continue;
+                        page = ACCESS_ONCE(c->page);
-                        if (c->page) {
+                        if (page) {
-                                        if (flags & SO_TOTAL)
+                                if (flags & SO_TOTAL)
-                                                x = c->page->objects;
+                                        x = page->objects;
                                else if (flags & SO_OBJECTS)
-                                        x = c->page->inuse;
+                                        x = page->inuse;
                                else
                                        x = 1;
                                total += x;
-                                nodes[c->node] += x;
+                                nodes[node] += x;
                        }
                        page = c->partial;
                        if (page) {
                                x = page->pobjects;
-                                total += x;
+                                total += x;
-                                nodes[c->node] += x;
+                                nodes[node] += x;
                        }
-                        per_cpu[c->node]++;
+                        per_cpu[node]++;
                }
        }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b669aa6f6caf..1d8b32f07139 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
                goto fail;
        addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+        if (!addr)
+                return NULL;
        /*
         * In this function, newly allocated vm_struct is not added
@@ -2141,23 +2143,30 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
 {
-        /* apply_to_page_range() does all the hard work. */
+        pte_t ***p = data;
+        if (p) {
+                *(*p) = pte;
+                (*p)++;
+        }
        return 0;
 }
 /**
 *      alloc_vm_area - allocate a range of kernel address space
 *      @size:          size of the area
+ *      @ptes:          returns the PTEs for the address space
 *
 *      Returns:        NULL on failure, vm_struct on success
 *
 *      This function reserves a range of kernel address space, and
 *      allocates pagetables to map that range.  No actual mappings
- *      are created.  If the kernel address space is not shared
+ *      are created.
- *      between processes, it syncs the pagetable across all
+ *
- *      processes.
+ *      If @ptes is non-NULL, pointers to the PTEs (in init_mm)
+ *      allocated for the VM area are returned.
 */
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
        struct vm_struct *area;
@@ -2171,19 +2180,11 @@ struct vm_struct *alloc_vm_area(size_t size)
         * of kernel virtual address space and mapped into init_mm.
         */
        if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
-                                area->size, f, NULL)) {
+                                size, f, ptes ? &ptes : NULL)) {
                free_vm_area(area);
                return NULL;
        }
-        /*
-         * If the allocated address space is passed to a hypercall
-         * before being used then we cannot rely on a page fault to
-         * trigger an update of the page tables.  So sync all the page
-         * tables here.
-         */
-        vmalloc_sync_all();
        return area;
 }
 EXPORT_SYMBOL_GPL(alloc_vm_area);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1893c050795..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
 */
 void register_shrinker(struct shrinker *shrinker)
 {
-        shrinker->nr = 0;
+        atomic_long_set(&shrinker->nr_in_batch, 0);
        down_write(&shrinker_rwsem);
        list_add_tail(&shrinker->list, &shrinker_list);
        up_write(&shrinker_rwsem);
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink,
        list_for_each_entry(shrinker, &shrinker_list, list) {
                unsigned long long delta;
-                unsigned long total_scan;
+                long total_scan;
-                unsigned long max_pass;
+                long max_pass;
                int shrink_ret = 0;
                long nr;
                long new_nr;
                long batch_size = shrinker->batch ? shrinker->batch
                                                  : SHRINK_BATCH;
+                max_pass = do_shrinker_shrink(shrinker, shrink, 0);
+                if (max_pass <= 0)
+                        continue;
                /*
                 * copy the current shrinker scan count into a local variable
                 * and zero it so that other concurrent shrinker invocations
                 * don't also do this scanning work.
                 */
-                do {
+                nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
-                        nr = shrinker->nr;
-                } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
                total_scan = nr;
-                max_pass = do_shrinker_shrink(shrinker, shrink, 0);
                delta = (4 * nr_pages_scanned) / shrinker->seeks;
                delta *= max_pass;
                do_div(delta, lru_pages + 1);
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
                 * manner that handles concurrent updates. If we exhausted the
                 * scan, there is no need to do an update.
                 */
-                do {
+                if (total_scan > 0)
-                        nr = shrinker->nr;
+                        new_nr = atomic_long_add_return(total_scan,
-                        new_nr = total_scan + nr;
+                                        &shrinker->nr_in_batch);
-                        if (total_scan <= 0)
+                else
-                                break;
+                        new_nr = atomic_long_read(&shrinker->nr_in_batch);
-                } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
                trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
        }