Merge branch 'master' into pm-sleep

* master: (848 commits) SELinux: Fix RCU deref check warning in sel_netport_insert() binary_sysctl(): fix memory leak mm/vmalloc.c: remove static declaration of va from __get_vm_area_node ipmi_watchdog: restore settings when BMC reset oom: fix integer overflow of points in oom_badness memcg: keep root group unchanged if creation fails nilfs2: potential integer overflow in nilfs_ioctl_clean_segments() nilfs2: unbreak compat ioctl cpusets: stall when updating mems_allowed for mempolicy or disjoint nodemask evm: prevent racing during tfm allocation evm: key must be set once during initialization mmc: vub300: fix type of firmware_rom_wait_states module parameter Revert "mmc: enable runtime PM by default" mmc: sdhci: remove "state" argument from sdhci_suspend_host x86, dumpstack: Fix code bytes breakage due to missing KERN_CONT IB/qib: Correct sense on freectxts increment and decrement RDMA/cma: Verify private data length cgroups: fix a css_set not found bug in cgroup_attach_proc oprofile: Fix uninitialized memory access when writing to writing to oprofilefs Revert "xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel" ... Conflicts: kernel/cgroup_freezer.c
author: Rafael J. Wysocki <rjw@sisk.pl> 2011-12-21 15:59:45 -0500
committer: Rafael J. Wysocki <rjw@sisk.pl> 2011-12-21 15:59:45 -0500
commit: b00f4dc5ff022cb9cbaffd376d9454d7fa1e496f (patch)
tree: 40f1b232e2f1e8ac365317a14fdcbcb331722b46 /mm
parent: 1eac8111e0763853266a171ce11214da3a347a0a (diff)
parent: b9e26dfdad5a4f9cbdaacafac6998614cc9c41bc (diff)
14 files changed, 146 insertions, 88 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index c0018f2d50e0..c106d3b3cc64 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2407,7 +2407,6 @@ static ssize_t generic_perform_write(struct file *file,
                                                iov_iter_count(i));
 again:
                /*
                 * Bring in the user page that we will copy from _first_.
                 * Otherwise there's a nasty deadlock on copying from the
@@ -2463,7 +2462,10 @@ again:
                written += copied;
                balance_dirty_pages_ratelimited(mapping);
+                if (fatal_signal_pending(current)) {
+                        status = -EINTR;
+                        break;
+                }
        } while (iov_iter_count(i));
        return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298abaae153..36b3d988b4ef 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2259,12 +2259,8 @@ static void khugepaged_do_scan(struct page **hpage)
 static void khugepaged_alloc_sleep(void)
 {
-        DEFINE_WAIT(wait);
+        wait_event_freezable_timeout(khugepaged_wait, false,
-        add_wait_queue(&khugepaged_wait, &wait);
+                        msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
-        schedule_timeout_interruptible(
-                msecs_to_jiffies(
-                        khugepaged_alloc_sleep_millisecs));
-        remove_wait_queue(&khugepaged_wait, &wait);
 }
 #ifndef CONFIG_NUMA
@@ -2313,14 +2309,10 @@ static void khugepaged_loop(void)
                if (unlikely(kthread_should_stop()))
                        break;
                if (khugepaged_has_work()) {
-                        DEFINE_WAIT(wait);
                        if (!khugepaged_scan_sleep_millisecs)
                                continue;
-                        add_wait_queue(&khugepaged_wait, &wait);
+                        wait_event_freezable_timeout(khugepaged_wait, false,
-                        schedule_timeout_interruptible(
+                            msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
-                                msecs_to_jiffies(
-                                        khugepaged_scan_sleep_millisecs));
-                        remove_wait_queue(&khugepaged_wait, &wait);
                } else if (khugepaged_enabled())
                        wait_event_freezable(khugepaged_wait,
                                             khugepaged_wait_event());
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb28a5f9db8d..73f17c0293c0 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -576,6 +576,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
                __SetPageTail(p);
+                set_page_count(p, 0);
                p->first_page = page;
        }
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6aff93c98aca..b63f5f7dfa07 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4907,9 +4907,9 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                int cpu;
                enable_swap_cgroup();
                parent = NULL;
-                root_mem_cgroup = memcg;
                if (mem_cgroup_soft_limit_tree_init())
                        goto free_out;
+                root_mem_cgroup = memcg;
                for_each_possible_cpu(cpu) {
                        struct memcg_stock_pcp *stock =
                                                &per_cpu(memcg_stock, cpu);
@@ -4948,7 +4948,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        return &memcg->css;
 free_out:
        __mem_cgroup_free(memcg);
-        root_mem_cgroup = NULL;
        return ERR_PTR(error);
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 578e29174fa6..177aca424a06 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -871,9 +871,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
        if (anon_vma)
                put_anon_vma(anon_vma);
-out:
        unlock_page(hpage);
+out:
        if (rc != -EAGAIN) {
                list_del(&hpage->lru);
                put_page(hpage);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 3134ee2fb2e8..eeb27e27dce3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -176,7 +176,7 @@ static bool oom_unkillable_task(struct task_struct *p,
 unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
                      const nodemask_t *nodemask, unsigned long totalpages)
 {
-        int points;
+        long points;
        if (oom_unkillable_task(p, mem, nodemask))
                return 0;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 71252486bc6f..50f08241f981 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -411,8 +411,13 @@ void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty)
 *
 * Returns @bdi's dirty limit in pages. The term "dirty" in the context of
 * dirty balancing includes all PG_dirty, PG_writeback and NFS unstable pages.
- * And the "limit" in the name is not seriously taken as hard limit in
+ *
- * balance_dirty_pages().
+ * Note that balance_dirty_pages() will only seriously take it as a hard limit
+ * when sleeping max_pause per page is not enough to keep the dirty pages under
+ * control. For example, when the device is completely stalled due to some error
+ * conditions, or when there are 1000 dd tasks writing to a slow 10MB/s USB key.
+ * In the other normal situations, it acts more gently by throttling the tasks
+ * more (rather than completely block them) when the bdi dirty pages go high.
 *
 * It allocates high/low dirty limits to fast/slow devices, in order to prevent
 * - starving fast devices
@@ -594,6 +599,13 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
         */
        if (unlikely(bdi_thresh > thresh))
                bdi_thresh = thresh;
+        /*
+         * It's very possible that bdi_thresh is close to 0 not because the
+         * device is slow, but that it has remained inactive for long time.
+         * Honour such devices a reasonable good (hopefully IO efficient)
+         * threshold, so that the occasional writes won't be blocked and active
+         * writes can rampup the threshold quickly.
+         */
        bdi_thresh = max(bdi_thresh, (limit - dirty) / 8);
        /*
         * scale global setpoint to bdi's:
@@ -977,8 +989,7 @@ static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
         *
         * 8 serves as the safety ratio.
         */
-        if (bdi_dirty)
+        t = min(t, bdi_dirty * HZ / (8 * bw + 1));
-                t = min(t, bdi_dirty * HZ / (8 * bw + 1));
        /*
         * The pause time will be settled within range (max_pause/4, max_pause).
@@ -1136,6 +1147,19 @@ pause:
                if (task_ratelimit)
                        break;
+                /*
+                 * In the case of an unresponding NFS server and the NFS dirty
+                 * pages exceeds dirty_thresh, give the other good bdi's a pipe
+                 * to go through, so that tasks on them still remain responsive.
+                 *
+                 * In theory 1 page is enough to keep the comsumer-producer
+                 * pipe going: the flusher cleans 1 page => the task dirties 1
+                 * more page. However bdi_dirty has accounting errors.  So use
+                 * the larger and more IO friendly bdi_stat_error.
+                 */
+                if (bdi_dirty <= bdi_stat_error(bdi))
+                        break;
                if (fatal_signal_pending(current))
                        break;
        }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9dd443d89d8b..2b8ba3aebf6e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -356,8 +356,8 @@ void prep_compound_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
                __SetPageTail(p);
+                set_page_count(p, 0);
                p->first_page = page;
        }
 }
@@ -3377,9 +3377,15 @@ static void setup_zone_migrate_reserve(struct zone *zone)
        unsigned long block_migratetype;
        int reserve;
-        /* Get the start pfn, end pfn and the number of blocks to reserve */
+        /*
+         * Get the start pfn, end pfn and the number of blocks to reserve
+         * We have to be careful to be aligned to pageblock_nr_pages to
+         * make sure that we always check pfn_valid for the first page in
+         * the block.
+         */
        start_pfn = zone->zone_start_pfn;
        end_pfn = start_pfn + zone->spanned_pages;
+        start_pfn = roundup(start_pfn, pageblock_nr_pages);
        reserve = roundup(min_wmark_pages(zone), pageblock_nr_pages) >>
                                                        pageblock_order;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index ea534960a04b..12a48a88c0d8 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
        if (!pages || !bitmap) {
                if (may_alloc && !pages)
-                        pages = pcpu_mem_alloc(pages_size);
+                        pages = pcpu_mem_zalloc(pages_size);
                if (may_alloc && !bitmap)
-                        bitmap = pcpu_mem_alloc(bitmap_size);
+                        bitmap = pcpu_mem_zalloc(bitmap_size);
                if (!pages || !bitmap)
                        return NULL;
        }
-        memset(pages, 0, pages_size);
        bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
        *bitmapp = bitmap;
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
                                 int page_start, int page_end)
 {
        flush_cache_vunmap(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
                                      int page_start, int page_end)
 {
        flush_tlb_kernel_range(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
                                int page_start, int page_end)
 {
        flush_cache_vmap(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 /**
diff --git a/mm/percpu.c b/mm/percpu.c
index bf80e55dbed7..716eb4acf2fc 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
-/* cpus with the lowest and highest unit numbers */
+/* cpus with the lowest and highest unit addresses */
-static unsigned int pcpu_first_unit_cpu __read_mostly;
+static unsigned int pcpu_low_unit_cpu __read_mostly;
-static unsigned int pcpu_last_unit_cpu __read_mostly;
+static unsigned int pcpu_high_unit_cpu __read_mostly;
 /* the address of the first chunk which starts with the kernel static area */
 void *pcpu_base_addr __read_mostly;
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
             (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
 /**
- * pcpu_mem_alloc - allocate memory
+ * pcpu_mem_zalloc - allocate memory
 * @size: bytes to allocate
 *
 * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vmalloc() is used.  The returned
+ * kzalloc() is used; otherwise, vzalloc() is used.  The returned
 * memory is always zeroed.
 *
 * CONTEXT:
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
 * RETURNS:
 * Pointer to the allocated area on success, NULL on failure.
 */
-static void *pcpu_mem_alloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size)
 {
        if (WARN_ON_ONCE(!slab_is_available()))
                return NULL;
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size)
 * @ptr: memory to free
 * @size: size of the area
 *
- * Free @ptr.  @ptr should have been allocated using pcpu_mem_alloc().
+ * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
 */
 static void pcpu_mem_free(void *ptr, size_t size)
 {
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
        size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
        unsigned long flags;
-        new = pcpu_mem_alloc(new_size);
+        new = pcpu_mem_zalloc(new_size);
        if (!new)
                return -ENOMEM;
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 {
        struct pcpu_chunk *chunk;
-        chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
+        chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
        if (!chunk)
                return NULL;
-        chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
+        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
+                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
                kfree(chunk);
                return NULL;
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr)
 * address.  The caller is responsible for ensuring @addr stays valid
 * until this function finishes.
 *
+ * percpu allocator has special setup for the first chunk, which currently
+ * supports either embedding in linear address space or vmalloc mapping,
+ * and, from the second one, the backing allocator (currently either vm or
+ * km) provides translation.
+ *
+ * The addr can be tranlated simply without checking if it falls into the
+ * first chunk. But the current code reflects better how percpu allocator
+ * actually works, and the verification can discover both bugs in percpu
+ * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
+ * code.
+ *
 * RETURNS:
 * The physical address for @addr.
 */
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
 {
        void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
        bool in_first_chunk = false;
-        unsigned long first_start, first_end;
+        unsigned long first_low, first_high;
        unsigned int cpu;
        /*
-         * The following test on first_start/end isn't strictly
+         * The following test on unit_low/high isn't strictly
         * necessary but will speed up lookups of addresses which
         * aren't in the first chunk.
         */
-        first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
+        first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
-        first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
+        first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
-                                    pcpu_unit_pages);
+                                     pcpu_unit_pages);
-        if ((unsigned long)addr >= first_start &&
+        if ((unsigned long)addr >= first_low &&
-            (unsigned long)addr < first_end) {
+            (unsigned long)addr < first_high) {
                for_each_possible_cpu(cpu) {
                        void *start = per_cpu_ptr(base, cpu);
@@ -1011,9 +1023,11 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
                if (!is_vmalloc_addr(addr))
                        return __pa(addr);
                else
-                        return page_to_phys(vmalloc_to_page(addr));
+                        return page_to_phys(vmalloc_to_page(addr)) +
+                               offset_in_page(addr);
        } else
-                return page_to_phys(pcpu_addr_to_page(addr));
+                return page_to_phys(pcpu_addr_to_page(addr)) +
+                       offset_in_page(addr);
 }
 /**
@@ -1233,7 +1247,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
        for (cpu = 0; cpu < nr_cpu_ids; cpu++)
                unit_map[cpu] = UINT_MAX;
-        pcpu_first_unit_cpu = NR_CPUS;
+        pcpu_low_unit_cpu = NR_CPUS;
+        pcpu_high_unit_cpu = NR_CPUS;
        for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
                const struct pcpu_group_info *gi = &ai->groups[group];
@@ -1253,9 +1269,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                        unit_map[cpu] = unit + i;
                        unit_off[cpu] = gi->base_offset + i * ai->unit_size;
-                        if (pcpu_first_unit_cpu == NR_CPUS)
+                        /* determine low/high unit_cpu */
-                                pcpu_first_unit_cpu = cpu;
+                        if (pcpu_low_unit_cpu == NR_CPUS ||
-                        pcpu_last_unit_cpu = cpu;
+                            unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
+                                pcpu_low_unit_cpu = cpu;
+                        if (pcpu_high_unit_cpu == NR_CPUS ||
+                            unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
+                                pcpu_high_unit_cpu = cpu;
                }
        }
        pcpu_nr_units = unit;
@@ -1889,7 +1909,7 @@ void __init percpu_init_late(void)
                BUILD_BUG_ON(size > PAGE_SIZE);
-                map = pcpu_mem_alloc(size);
+                map = pcpu_mem_zalloc(size);
                BUG_ON(!map);
                spin_lock_irqsave(&pcpu_lock, flags);
diff --git a/mm/slab.c b/mm/slab.c
index 708efe886154..83311c9aaf9d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -595,6 +595,7 @@ static enum {
        PARTIAL_AC,
        PARTIAL_L3,
        EARLY,
+        LATE,
        FULL
 } g_cpucache_up;
@@ -671,7 +672,7 @@ static void init_node_lock_keys(int q)
 {
        struct cache_sizes *s = malloc_sizes;
-        if (g_cpucache_up != FULL)
+        if (g_cpucache_up < LATE)
                return;
        for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
@@ -1666,6 +1667,8 @@ void __init kmem_cache_init_late(void)
 {
        struct kmem_cache *cachep;
+        g_cpucache_up = LATE;
        /* Annotate slab for lockdep -- annotate the malloc caches */
        init_lock_keys();
diff --git a/mm/slub.c b/mm/slub.c
index 7d2a996c307e..ed3334d9b6da 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s)
 {
        struct kmem_cache_node *n = NULL;
        struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
-        struct page *page;
+        struct page *page, *discard_page = NULL;
        while ((page = c->partial)) {
                enum slab_modes { M_PARTIAL, M_FREE };
@@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s)
                                if (l == M_PARTIAL)
                                        remove_partial(n, page);
                                else
-                                        add_partial(n, page, 1);
+                                        add_partial(n, page,
+                                                DEACTIVATE_TO_TAIL);
                                l = m;
                        }
@@ -1915,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s)
                                "unfreezing slab"));
                if (m == M_FREE) {
-                        stat(s, DEACTIVATE_EMPTY);
+                        page->next = discard_page;
-                        discard_slab(s, page);
+                        discard_page = page;
-                        stat(s, FREE_SLAB);
                }
        }
        if (n)
                spin_unlock(&n->list_lock);
+        while (discard_page) {
+                page = discard_page;
+                discard_page = discard_page->next;
+                stat(s, DEACTIVATE_EMPTY);
+                discard_slab(s, page);
+                stat(s, FREE_SLAB);
+        }
 }
 /*
@@ -1969,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
                page->pobjects = pobjects;
                page->next = oldpage;
-        } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+        } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
        stat(s, CPU_PARTIAL_FREE);
        return pobjects;
 }
@@ -4435,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                for_each_possible_cpu(cpu) {
                        struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+                        int node = ACCESS_ONCE(c->node);
                        struct page *page;
-                        if (!c || c->node < 0)
+                        if (node < 0)
                                continue;
+                        page = ACCESS_ONCE(c->page);
-                        if (c->page) {
+                        if (page) {
-                                        if (flags & SO_TOTAL)
+                                if (flags & SO_TOTAL)
-                                                x = c->page->objects;
+                                        x = page->objects;
                                else if (flags & SO_OBJECTS)
-                                        x = c->page->inuse;
+                                        x = page->inuse;
                                else
                                        x = 1;
                                total += x;
-                                nodes[c->node] += x;
+                                nodes[node] += x;
                        }
                        page = c->partial;
                        if (page) {
                                x = page->pobjects;
-                                total += x;
+                                total += x;
-                                nodes[c->node] += x;
+                                nodes[node] += x;
                        }
-                        per_cpu[c->node]++;
+                        per_cpu[node]++;
                }
        }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 3231bf332878..27be2f0d4cb7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1290,7 +1290,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
                unsigned long align, unsigned long flags, unsigned long start,
                unsigned long end, int node, gfp_t gfp_mask, void *caller)
 {
-        static struct vmap_area *va;
+        struct vmap_area *va;
        struct vm_struct *area;
        BUG_ON(in_interrupt());
@@ -1633,6 +1633,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
                goto fail;
        addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller);
+        if (!addr)
+                return NULL;
        /*
         * In this function, newly allocated vm_struct is not added
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a1893c050795..f54a05b7a61d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -183,7 +183,7 @@ static unsigned long zone_nr_lru_pages(struct zone *zone,
 */
 void register_shrinker(struct shrinker *shrinker)
 {
-        shrinker->nr = 0;
+        atomic_long_set(&shrinker->nr_in_batch, 0);
        down_write(&shrinker_rwsem);
        list_add_tail(&shrinker->list, &shrinker_list);
        up_write(&shrinker_rwsem);
@@ -247,25 +247,26 @@ unsigned long shrink_slab(struct shrink_control *shrink,
        list_for_each_entry(shrinker, &shrinker_list, list) {
                unsigned long long delta;
-                unsigned long total_scan;
+                long total_scan;
-                unsigned long max_pass;
+                long max_pass;
                int shrink_ret = 0;
                long nr;
                long new_nr;
                long batch_size = shrinker->batch ? shrinker->batch
                                                  : SHRINK_BATCH;
+                max_pass = do_shrinker_shrink(shrinker, shrink, 0);
+                if (max_pass <= 0)
+                        continue;
                /*
                 * copy the current shrinker scan count into a local variable
                 * and zero it so that other concurrent shrinker invocations
                 * don't also do this scanning work.
                 */
-                do {
+                nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
-                        nr = shrinker->nr;
-                } while (cmpxchg(&shrinker->nr, nr, 0) != nr);
                total_scan = nr;
-                max_pass = do_shrinker_shrink(shrinker, shrink, 0);
                delta = (4 * nr_pages_scanned) / shrinker->seeks;
                delta *= max_pass;
                do_div(delta, lru_pages + 1);
@@ -325,12 +326,11 @@ unsigned long shrink_slab(struct shrink_control *shrink,
                 * manner that handles concurrent updates. If we exhausted the
                 * scan, there is no need to do an update.
                 */
-                do {
+                if (total_scan > 0)
-                        nr = shrinker->nr;
+                        new_nr = atomic_long_add_return(total_scan,
-                        new_nr = total_scan + nr;
+                                        &shrinker->nr_in_batch);
-                        if (total_scan <= 0)
+                else
-                                break;
+                        new_nr = atomic_long_read(&shrinker->nr_in_batch);
-                } while (cmpxchg(&shrinker->nr, nr, new_nr) != nr);
                trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
        }
author	Rafael J. Wysocki <rjw@sisk.pl>	2011-12-21 15:59:45 -0500
committer	Rafael J. Wysocki <rjw@sisk.pl>	2011-12-21 15:59:45 -0500
commit	b00f4dc5ff022cb9cbaffd376d9454d7fa1e496f (patch)
tree	40f1b232e2f1e8ac365317a14fdcbcb331722b46 /mm
parent	1eac8111e0763853266a171ce11214da3a347a0a (diff)
parent	b9e26dfdad5a4f9cbdaacafac6998614cc9c41bc (diff)