Merge branch 'for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into devel-stable

Conflicts: arch/arm/common/gic.c arch/arm/plat-omap/include/plat/common.h
author: Russell King <rmk+kernel@arm.linux.org.uk> 2011-12-05 18:20:17 -0500
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2011-12-05 18:20:17 -0500
commit: 742eaa6a6e356a16788ce6530271de89bc4f8fb5 (patch)
tree: 12fc040daab06ac796c61c1d92bfad9bb054d1c1 /mm
parent: ba8bb18a03f8c7508565c385576a5431a4ad804a (diff)
parent: ae72fd588a2b302222769b44775912b83f0785eb (diff)
9 files changed, 110 insertions, 78 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a0860640378d..71034f41a2ba 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -724,6 +724,14 @@ void bdi_destroy(struct backing_dev_info *bdi)
        bdi_unregister(bdi);
+        /*
+         * If bdi_unregister() had already been called earlier, the
+         * wakeup_timer could still be armed because bdi_prune_sb()
+         * can race with the bdi_wakeup_thread_delayed() calls from
+         * __mark_inode_dirty().
+         */
+        del_timer_sync(&bdi->wb.wakeup_timer);
        for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
                percpu_counter_destroy(&bdi->bdi_stat[i]);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index dae27ba3be2c..bb28a5f9db8d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2422,6 +2422,8 @@ retry_avoidcopy:
         * anon_vma prepared.
         */
        if (unlikely(anon_vma_prepare(vma))) {
+                page_cache_release(new_page);
+                page_cache_release(old_page);
                /* Caller expects lock to be held */
                spin_lock(&mm->page_table_lock);
                return VM_FAULT_OOM;
diff --git a/mm/nommu.c b/mm/nommu.c
index 73419c55eda6..b982290fd962 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -454,7 +454,7 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 *      between processes, it syncs the pagetable across all
 *      processes.
 */
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
        BUG();
        return NULL;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 471dedb463ab..76f2c5ae908e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -185,6 +185,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
        if (!p)
                return 0;
+        if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+                task_unlock(p);
+                return 0;
+        }
        /*
         * The memory controller may have a limit of 0 bytes, so avoid a divide
         * by zero, if necessary.
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a3278f005230..71252486bc6f 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -128,7 +128,6 @@ unsigned long global_dirty_limit;
 *
 */
 static struct prop_descriptor vm_completions;
-static struct prop_descriptor vm_dirties;
 /*
 * couple the period to the dirty_ratio:
@@ -154,7 +153,6 @@ static void update_completion_period(void)
 {
        int shift = calc_period_shift();
        prop_change_shift(&vm_completions, shift);
-        prop_change_shift(&vm_dirties, shift);
        writeback_set_ratelimit();
 }
@@ -235,11 +233,6 @@ void bdi_writeout_inc(struct backing_dev_info *bdi)
 }
 EXPORT_SYMBOL_GPL(bdi_writeout_inc);
-void task_dirty_inc(struct task_struct *tsk)
-{
-        prop_inc_single(&vm_dirties, &tsk->dirties);
-}
 /*
 * Obtain an accurate fraction of the BDI's portion.
 */
@@ -1133,17 +1126,17 @@ pause:
                                          pages_dirtied,
                                          pause,
                                          start_time);
-                __set_current_state(TASK_UNINTERRUPTIBLE);
+                __set_current_state(TASK_KILLABLE);
                io_schedule_timeout(pause);
-                dirty_thresh = hard_dirty_limit(dirty_thresh);
                /*
-                 * max-pause area. If dirty exceeded but still within this
+                 * This is typically equal to (nr_dirty < dirty_thresh) and can
-                 * area, no need to sleep for more than 200ms: (a) 8 pages per
+                 * also keep "1000+ dd on a slow USB stick" under control.
-                 * 200ms is typically more than enough to curb heavy dirtiers;
-                 * (b) the pause time limit makes the dirtiers more responsive.
                 */
-                if (nr_dirty < dirty_thresh)
+                if (task_ratelimit)
+                        break;
+                if (fatal_signal_pending(current))
                        break;
        }
@@ -1395,7 +1388,6 @@ void __init page_writeback_init(void)
        shift = calc_period_shift();
        prop_descriptor_init(&vm_completions, shift);
-        prop_descriptor_init(&vm_dirties, shift);
 }
 /**
@@ -1724,7 +1716,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
                __inc_zone_page_state(page, NR_DIRTIED);
                __inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
                __inc_bdi_stat(mapping->backing_dev_info, BDI_DIRTIED);
-                task_dirty_inc(current);
                task_io_account_write(PAGE_CACHE_SIZE);
        }
 }
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index ea534960a04b..12a48a88c0d8 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -50,14 +50,13 @@ static struct page **pcpu_get_pages_and_bitmap(struct pcpu_chunk *chunk,
        if (!pages || !bitmap) {
                if (may_alloc && !pages)
-                        pages = pcpu_mem_alloc(pages_size);
+                        pages = pcpu_mem_zalloc(pages_size);
                if (may_alloc && !bitmap)
-                        bitmap = pcpu_mem_alloc(bitmap_size);
+                        bitmap = pcpu_mem_zalloc(bitmap_size);
                if (!pages || !bitmap)
                        return NULL;
        }
-        memset(pages, 0, pages_size);
        bitmap_copy(bitmap, chunk->populated, pcpu_unit_pages);
        *bitmapp = bitmap;
@@ -143,8 +142,8 @@ static void pcpu_pre_unmap_flush(struct pcpu_chunk *chunk,
                                 int page_start, int page_end)
 {
        flush_cache_vunmap(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 static void __pcpu_unmap_pages(unsigned long addr, int nr_pages)
@@ -206,8 +205,8 @@ static void pcpu_post_unmap_tlb_flush(struct pcpu_chunk *chunk,
                                      int page_start, int page_end)
 {
        flush_tlb_kernel_range(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 static int __pcpu_map_pages(unsigned long addr, struct page **pages,
@@ -284,8 +283,8 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
                                int page_start, int page_end)
 {
        flush_cache_vmap(
-                pcpu_chunk_addr(chunk, pcpu_first_unit_cpu, page_start),
+                pcpu_chunk_addr(chunk, pcpu_low_unit_cpu, page_start),
-                pcpu_chunk_addr(chunk, pcpu_last_unit_cpu, page_end));
+                pcpu_chunk_addr(chunk, pcpu_high_unit_cpu, page_end));
 }
 /**
diff --git a/mm/percpu.c b/mm/percpu.c
index bf80e55dbed7..3bb810a72006 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -116,9 +116,9 @@ static int pcpu_atom_size __read_mostly;
 static int pcpu_nr_slots __read_mostly;
 static size_t pcpu_chunk_struct_size __read_mostly;
-/* cpus with the lowest and highest unit numbers */
+/* cpus with the lowest and highest unit addresses */
-static unsigned int pcpu_first_unit_cpu __read_mostly;
+static unsigned int pcpu_low_unit_cpu __read_mostly;
-static unsigned int pcpu_last_unit_cpu __read_mostly;
+static unsigned int pcpu_high_unit_cpu __read_mostly;
 /* the address of the first chunk which starts with the kernel static area */
 void *pcpu_base_addr __read_mostly;
@@ -273,11 +273,11 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
             (rs) = (re) + 1, pcpu_next_pop((chunk), &(rs), &(re), (end)))
 /**
- * pcpu_mem_alloc - allocate memory
+ * pcpu_mem_zalloc - allocate memory
 * @size: bytes to allocate
 *
 * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vmalloc() is used.  The returned
+ * kzalloc() is used; otherwise, vzalloc() is used.  The returned
 * memory is always zeroed.
 *
 * CONTEXT:
@@ -286,7 +286,7 @@ static void __maybe_unused pcpu_next_pop(struct pcpu_chunk *chunk,
 * RETURNS:
 * Pointer to the allocated area on success, NULL on failure.
 */
-static void *pcpu_mem_alloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size)
 {
        if (WARN_ON_ONCE(!slab_is_available()))
                return NULL;
@@ -302,7 +302,7 @@ static void *pcpu_mem_alloc(size_t size)
 * @ptr: memory to free
 * @size: size of the area
 *
- * Free @ptr.  @ptr should have been allocated using pcpu_mem_alloc().
+ * Free @ptr.  @ptr should have been allocated using pcpu_mem_zalloc().
 */
 static void pcpu_mem_free(void *ptr, size_t size)
 {
@@ -384,7 +384,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
        size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
        unsigned long flags;
-        new = pcpu_mem_alloc(new_size);
+        new = pcpu_mem_zalloc(new_size);
        if (!new)
                return -ENOMEM;
@@ -604,11 +604,12 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
 {
        struct pcpu_chunk *chunk;
-        chunk = pcpu_mem_alloc(pcpu_chunk_struct_size);
+        chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
        if (!chunk)
                return NULL;
-        chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
+        chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC *
+                                                sizeof(chunk->map[0]));
        if (!chunk->map) {
                kfree(chunk);
                return NULL;
@@ -977,6 +978,17 @@ bool is_kernel_percpu_address(unsigned long addr)
 * address.  The caller is responsible for ensuring @addr stays valid
 * until this function finishes.
 *
+ * percpu allocator has special setup for the first chunk, which currently
+ * supports either embedding in linear address space or vmalloc mapping,
+ * and, from the second one, the backing allocator (currently either vm or
+ * km) provides translation.
+ *
+ * The addr can be tranlated simply without checking if it falls into the
+ * first chunk. But the current code reflects better how percpu allocator
+ * actually works, and the verification can discover both bugs in percpu
+ * allocator itself and per_cpu_ptr_to_phys() callers. So we keep current
+ * code.
+ *
 * RETURNS:
 * The physical address for @addr.
 */
@@ -984,19 +996,19 @@ phys_addr_t per_cpu_ptr_to_phys(void *addr)
 {
        void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
        bool in_first_chunk = false;
-        unsigned long first_start, first_end;
+        unsigned long first_low, first_high;
        unsigned int cpu;
        /*
-         * The following test on first_start/end isn't strictly
+         * The following test on unit_low/high isn't strictly
         * necessary but will speed up lookups of addresses which
         * aren't in the first chunk.
         */
-        first_start = pcpu_chunk_addr(pcpu_first_chunk, pcpu_first_unit_cpu, 0);
+        first_low = pcpu_chunk_addr(pcpu_first_chunk, pcpu_low_unit_cpu, 0);
-        first_end = pcpu_chunk_addr(pcpu_first_chunk, pcpu_last_unit_cpu,
+        first_high = pcpu_chunk_addr(pcpu_first_chunk, pcpu_high_unit_cpu,
-                                    pcpu_unit_pages);
+                                     pcpu_unit_pages);
-        if ((unsigned long)addr >= first_start &&
+        if ((unsigned long)addr >= first_low &&
-            (unsigned long)addr < first_end) {
+            (unsigned long)addr < first_high) {
                for_each_possible_cpu(cpu) {
                        void *start = per_cpu_ptr(base, cpu);
@@ -1233,7 +1245,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
        for (cpu = 0; cpu < nr_cpu_ids; cpu++)
                unit_map[cpu] = UINT_MAX;
-        pcpu_first_unit_cpu = NR_CPUS;
+        pcpu_low_unit_cpu = NR_CPUS;
+        pcpu_high_unit_cpu = NR_CPUS;
        for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) {
                const struct pcpu_group_info *gi = &ai->groups[group];
@@ -1253,9 +1267,13 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
                        unit_map[cpu] = unit + i;
                        unit_off[cpu] = gi->base_offset + i * ai->unit_size;
-                        if (pcpu_first_unit_cpu == NR_CPUS)
+                        /* determine low/high unit_cpu */
-                                pcpu_first_unit_cpu = cpu;
+                        if (pcpu_low_unit_cpu == NR_CPUS ||
-                        pcpu_last_unit_cpu = cpu;
+                            unit_off[cpu] < unit_off[pcpu_low_unit_cpu])
+                                pcpu_low_unit_cpu = cpu;
+                        if (pcpu_high_unit_cpu == NR_CPUS ||
+                            unit_off[cpu] > unit_off[pcpu_high_unit_cpu])
+                                pcpu_high_unit_cpu = cpu;
                }
        }
        pcpu_nr_units = unit;
@@ -1889,7 +1907,7 @@ void __init percpu_init_late(void)
                BUILD_BUG_ON(size > PAGE_SIZE);
-                map = pcpu_mem_alloc(size);
+                map = pcpu_mem_zalloc(size);
                BUG_ON(!map);
                spin_lock_irqsave(&pcpu_lock, flags);
diff --git a/mm/slub.c b/mm/slub.c
index 7d2a996c307e..ed3334d9b6da 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1862,7 +1862,7 @@ static void unfreeze_partials(struct kmem_cache *s)
 {
        struct kmem_cache_node *n = NULL;
        struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
-        struct page *page;
+        struct page *page, *discard_page = NULL;
        while ((page = c->partial)) {
                enum slab_modes { M_PARTIAL, M_FREE };
@@ -1904,7 +1904,8 @@ static void unfreeze_partials(struct kmem_cache *s)
                                if (l == M_PARTIAL)
                                        remove_partial(n, page);
                                else
-                                        add_partial(n, page, 1);
+                                        add_partial(n, page,
+                                                DEACTIVATE_TO_TAIL);
                                l = m;
                        }
@@ -1915,14 +1916,22 @@ static void unfreeze_partials(struct kmem_cache *s)
                                "unfreezing slab"));
                if (m == M_FREE) {
-                        stat(s, DEACTIVATE_EMPTY);
+                        page->next = discard_page;
-                        discard_slab(s, page);
+                        discard_page = page;
-                        stat(s, FREE_SLAB);
                }
        }
        if (n)
                spin_unlock(&n->list_lock);
+        while (discard_page) {
+                page = discard_page;
+                discard_page = discard_page->next;
+                stat(s, DEACTIVATE_EMPTY);
+                discard_slab(s, page);
+                stat(s, FREE_SLAB);
+        }
 }
 /*
@@ -1969,7 +1978,7 @@ int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
                page->pobjects = pobjects;
                page->next = oldpage;
-        } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+        } while (irqsafe_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
        stat(s, CPU_PARTIAL_FREE);
        return pobjects;
 }
@@ -4435,30 +4444,31 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                for_each_possible_cpu(cpu) {
                        struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+                        int node = ACCESS_ONCE(c->node);
                        struct page *page;
-                        if (!c || c->node < 0)
+                        if (node < 0)
                                continue;
+                        page = ACCESS_ONCE(c->page);
-                        if (c->page) {
+                        if (page) {
-                                        if (flags & SO_TOTAL)
+                                if (flags & SO_TOTAL)
-                                                x = c->page->objects;
+                                        x = page->objects;
                                else if (flags & SO_OBJECTS)
-                                        x = c->page->inuse;
+                                        x = page->inuse;
                                else
                                        x = 1;
                                total += x;
-                                nodes[c->node] += x;
+                                nodes[node] += x;
                        }
                        page = c->partial;
                        if (page) {
                                x = page->pobjects;
-                                total += x;
+                                total += x;
-                                nodes[c->node] += x;
+                                nodes[node] += x;
                        }
-                        per_cpu[c->node]++;
+                        per_cpu[node]++;
                }
        }
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b669aa6f6caf..3231bf332878 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2141,23 +2141,30 @@ void  __attribute__((weak)) vmalloc_sync_all(void)
 static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
 {
-        /* apply_to_page_range() does all the hard work. */
+        pte_t ***p = data;
+        if (p) {
+                *(*p) = pte;
+                (*p)++;
+        }
        return 0;
 }
 /**
 *      alloc_vm_area - allocate a range of kernel address space
 *      @size:          size of the area
+ *      @ptes:          returns the PTEs for the address space
 *
 *      Returns:        NULL on failure, vm_struct on success
 *
 *      This function reserves a range of kernel address space, and
 *      allocates pagetables to map that range.  No actual mappings
- *      are created.  If the kernel address space is not shared
+ *      are created.
- *      between processes, it syncs the pagetable across all
+ *
- *      processes.
+ *      If @ptes is non-NULL, pointers to the PTEs (in init_mm)
+ *      allocated for the VM area are returned.
 */
-struct vm_struct *alloc_vm_area(size_t size)
+struct vm_struct *alloc_vm_area(size_t size, pte_t **ptes)
 {
        struct vm_struct *area;
@@ -2171,19 +2178,11 @@ struct vm_struct *alloc_vm_area(size_t size)
         * of kernel virtual address space and mapped into init_mm.
         */
        if (apply_to_page_range(&init_mm, (unsigned long)area->addr,
-                                area->size, f, NULL)) {
+                                size, f, ptes ? &ptes : NULL)) {
                free_vm_area(area);
                return NULL;
        }
-        /*
-         * If the allocated address space is passed to a hypercall
-         * before being used then we cannot rely on a page fault to
-         * trigger an update of the page tables.  So sync all the page
-         * tables here.
-         */
-        vmalloc_sync_all();
        return area;
 }
 EXPORT_SYMBOL_GPL(alloc_vm_area);
author	Russell King <rmk+kernel@arm.linux.org.uk>	2011-12-05 18:20:17 -0500
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2011-12-05 18:20:17 -0500
commit	742eaa6a6e356a16788ce6530271de89bc4f8fb5 (patch)
tree	12fc040daab06ac796c61c1d92bfad9bb054d1c1 /mm
parent	ba8bb18a03f8c7508565c385576a5431a4ad804a (diff)
parent	ae72fd588a2b302222769b44775912b83f0785eb (diff)