Merge branch 'master'; commit 'v2.6.39-rc3' into next

author: James Morris <jmorris@namei.org> 2011-04-19 07:32:41 -0400
committer: James Morris <jmorris@namei.org> 2011-04-19 07:32:41 -0400
commit: d4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch)
tree: eefd82c155bc27469a85667d759cd90facf4a6e3 /mm
parent: c0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff)
parent: 96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff)
27 files changed, 146 insertions, 130 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0d9a036ada66..befc87531e4f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -787,7 +787,7 @@ EXPORT_SYMBOL(congestion_wait);
 * jiffies for either a BDI to exit congestion of the given @sync queue
 * or a write to complete.
 *
- * In the absense of zone congestion, cond_resched() is called to yield
+ * In the absence of zone congestion, cond_resched() is called to yield
 * the processor if necessary but otherwise does not sleep.
 *
 * The return value is 0 if the sleep is for the full timeout. Otherwise,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0a619e0e2e0b..470dcda10add 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -244,24 +244,28 @@ static ssize_t single_flag_show(struct kobject *kobj,
                                struct kobj_attribute *attr, char *buf,
                                enum transparent_hugepage_flag flag)
 {
-        if (test_bit(flag, &transparent_hugepage_flags))
+        return sprintf(buf, "%d\n",
-                return sprintf(buf, "[yes] no\n");
+                       !!test_bit(flag, &transparent_hugepage_flags));
-        else
-                return sprintf(buf, "yes [no]\n");
 }
 static ssize_t single_flag_store(struct kobject *kobj,
                                 struct kobj_attribute *attr,
                                 const char *buf, size_t count,
                                 enum transparent_hugepage_flag flag)
 {
-        if (!memcmp("yes", buf,
+        unsigned long value;
-                    min(sizeof("yes")-1, count))) {
+        int ret;
+        ret = kstrtoul(buf, 10, &value);
+        if (ret < 0)
+                return ret;
+        if (value > 1)
+                return -EINVAL;
+        if (value)
                set_bit(flag, &transparent_hugepage_flags);
-        } else if (!memcmp("no", buf,
+        else
-                           min(sizeof("no")-1, count))) {
                clear_bit(flag, &transparent_hugepage_flags);
-        } else
-                return -EINVAL;
        return count;
 }
@@ -680,8 +684,11 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        return VM_FAULT_OOM;
                page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
                                          vma, haddr, numa_node_id(), 0);
-                if (unlikely(!page))
+                if (unlikely(!page)) {
+                        count_vm_event(THP_FAULT_FALLBACK);
                        goto out;
+                }
+                count_vm_event(THP_FAULT_ALLOC);
                if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
                        put_page(page);
                        goto out;
@@ -909,11 +916,13 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                new_page = NULL;
        if (unlikely(!new_page)) {
+                count_vm_event(THP_FAULT_FALLBACK);
                ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
                                                   pmd, orig_pmd, page, haddr);
                put_page(page);
                goto out;
        }
+        count_vm_event(THP_FAULT_ALLOC);
        if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
                put_page(new_page);
@@ -1390,6 +1399,7 @@ int split_huge_page(struct page *page)
        BUG_ON(!PageSwapBacked(page));
        __split_huge_page(page, anon_vma);
+        count_vm_event(THP_SPLIT);
        BUG_ON(PageCompound(page));
 out_unlock:
@@ -1784,9 +1794,11 @@ static void collapse_huge_page(struct mm_struct *mm,
                                      node, __GFP_OTHER_NODE);
        if (unlikely(!new_page)) {
                up_read(&mm->mmap_sem);
+                count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                *hpage = ERR_PTR(-ENOMEM);
                return;
        }
+        count_vm_event(THP_COLLAPSE_ALLOC);
        if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
                up_read(&mm->mmap_sem);
                put_page(new_page);
@@ -2151,8 +2163,11 @@ static void khugepaged_do_scan(struct page **hpage)
 #ifndef CONFIG_NUMA
                if (!*hpage) {
                        *hpage = alloc_hugepage(khugepaged_defrag());
-                        if (unlikely(!*hpage))
+                        if (unlikely(!*hpage)) {
+                                count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                                break;
+                        }
+                        count_vm_event(THP_COLLAPSE_ALLOC);
                }
 #else
                if (IS_ERR(*hpage))
@@ -2192,8 +2207,11 @@ static struct page *khugepaged_alloc_hugepage(void)
        do {
                hpage = alloc_hugepage(khugepaged_defrag());
-                if (!hpage)
+                if (!hpage) {
+                        count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                        khugepaged_alloc_sleep();
+                } else
+                        count_vm_event(THP_COLLAPSE_ALLOC);
        } while (unlikely(!hpage) &&
                 likely(khugepaged_enabled()));
        return hpage;
@@ -2210,8 +2228,11 @@ static void khugepaged_loop(void)
        while (likely(khugepaged_enabled())) {
 #ifndef CONFIG_NUMA
                hpage = khugepaged_alloc_hugepage();
-                if (unlikely(!hpage))
+                if (unlikely(!hpage)) {
+                        count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                        break;
+                }
+                count_vm_event(THP_COLLAPSE_ALLOC);
 #else
                if (IS_ERR(hpage)) {
                        khugepaged_alloc_sleep();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 06de5aa4d644..8ee3bd8ec5b5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -146,7 +146,7 @@ static long region_chg(struct list_head *head, long f, long t)
                if (rg->from > t)
                        return chg;
-                /* We overlap with this area, if it extends futher than
+                /* We overlap with this area, if it extends further than
                 * us then we must extend ourselves.  Account for its
                 * existing reservation. */
                if (rg->to > t) {
@@ -842,7 +842,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
 }
 /*
- * Increase the hugetlb pool such that it can accomodate a reservation
+ * Increase the hugetlb pool such that it can accommodate a reservation
 * of size 'delta'.
 */
 static int gather_surplus_pages(struct hstate *h, int delta)
@@ -890,7 +890,7 @@ retry:
        /*
         * The surplus_list now contains _at_least_ the number of extra pages
-         * needed to accomodate the reservation.  Add the appropriate number
+         * needed to accommodate the reservation.  Add the appropriate number
         * of pages to the hugetlb pool and free the extras back to the buddy
         * allocator.  Commit the entire reservation here to prevent another
         * process from stealing the pages as they are added to the pool but
@@ -2043,7 +2043,7 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
         * This new VMA should share its siblings reservation map if present.
         * The VMA will only ever have a valid reservation map pointer where
         * it is being copied for another still existing VMA.  As that VMA
-         * has a reference to the reservation map it cannot dissappear until
+         * has a reference to the reservation map it cannot disappear until
         * after this open call completes.  It is therefore safe to take a
         * new reference here without additional locking.
         */
@@ -2490,7 +2490,7 @@ static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
        /*
         * Currently, we are forced to kill the process in the event the
         * original mapper has unmapped pages from the child due to a failed
-         * COW. Warn that such a situation has occured as it may not be obvious
+         * COW. Warn that such a situation has occurred as it may not be obvious
         */
        if (is_vma_resv_set(vma, HPAGE_RESV_UNMAPPED)) {
                printk(KERN_WARNING
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 0948f1072d6b..c7fc7fd00e32 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -1,4 +1,4 @@
-/* Inject a hwpoison memory failure on a arbitary pfn */
+/* Inject a hwpoison memory failure on a arbitrary pfn */
 #include <linux/module.h>
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
diff --git a/mm/internal.h b/mm/internal.h
index 3438dd43a062..9d0ced8e505e 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -162,7 +162,7 @@ static inline struct page *mem_map_offset(struct page *base, int offset)
 }
 /*
- * Iterator over all subpages withing the maximally aligned gigantic
+ * Iterator over all subpages within the maximally aligned gigantic
 * page 'base'.  Handle any discontiguity in the mem_map.
 */
 static inline struct page *mem_map_next(struct page *iter,
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 84225f3b7190..c1d5867543e4 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -265,7 +265,7 @@ static void kmemleak_disable(void);
 } while (0)
 /*
- * Macro invoked when a serious kmemleak condition occured and cannot be
+ * Macro invoked when a serious kmemleak condition occurred and cannot be
 * recovered from. Kmemleak will be disabled and further allocation/freeing
 * tracing no longer available.
 */
@@ -1006,7 +1006,7 @@ static bool update_checksum(struct kmemleak_object *object)
 /*
 * Memory scanning is a long process and it needs to be interruptable. This
- * function checks whether such interrupt condition occured.
+ * function checks whether such interrupt condition occurred.
 */
 static int scan_should_stop(void)
 {
@@ -1733,7 +1733,7 @@ static int __init kmemleak_late_init(void)
        if (atomic_read(&kmemleak_error)) {
                /*
-                 * Some error occured and kmemleak was disabled. There is a
+                 * Some error occurred and kmemleak was disabled. There is a
                 * small chance that kmemleak_disable() was called immediately
                 * after setting kmemleak_initialized and we may end up with
                 * two clean-up threads but serialized by scan_mutex.
diff --git a/mm/ksm.c b/mm/ksm.c
index 1bbe785aa559..942dfc73a2ff 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -720,7 +720,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
                swapped = PageSwapCache(page);
                flush_cache_page(vma, addr, page_to_pfn(page));
                /*
-                 * Ok this is tricky, when get_user_pages_fast() run it doesnt
+                 * Ok this is tricky, when get_user_pages_fast() run it doesn't
                 * take any lock, therefore the check that we are going to make
                 * with the pagecount against the mapcount is racey and
                 * O_DIRECT can happen right after the check.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1f0b460fe58c..010f9166fa6e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1466,7 +1466,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                                        break;
                                }
                                /*
-                                 * We want to do more targetted reclaim.
+                                 * We want to do more targeted reclaim.
                                 * excess >> 2 is not to excessive so as to
                                 * reclaim too much, nor too less that we keep
                                 * coming back to reclaim from this cgroup
@@ -2265,7 +2265,7 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
 * - compound_lock is held when nr_pages > 1
 *
 * This function doesn't do "charge" nor css_get to new cgroup. It should be
- * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is
+ * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is
 * true, this function does "uncharge" from old cgroup, but it doesn't if
 * @uncharge is false, so a caller should do "uncharge".
 */
@@ -2318,7 +2318,7 @@ static int mem_cgroup_move_account(struct page *page,
         * We charges against "to" which may not have any tasks. Then, "to"
         * can be under rmdir(). But in current implementation, caller of
         * this function is just force_empty() and move charge, so it's
-         * garanteed that "to" is never removed. So, we don't check rmdir
+         * guaranteed that "to" is never removed. So, we don't check rmdir
         * status here.
         */
        move_unlock_page_cgroup(pc, &flags);
@@ -2648,7 +2648,7 @@ static void mem_cgroup_do_uncharge(struct mem_cgroup *mem,
                batch->memcg = mem;
        /*
         * do_batch > 0 when unmapping pages or inode invalidate/truncate.
-         * In those cases, all pages freed continously can be expected to be in
+         * In those cases, all pages freed continuously can be expected to be in
         * the same cgroup and we have chance to coalesce uncharges.
         * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
         * because we want to do uncharge as soon as possible.
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 37feb9fec228..2b9a5eef39e0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -208,7 +208,7 @@ static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
         * Don't use force here, it's convenient if the signal
         * can be temporarily blocked.
         * This could cause a loop when the user sets SIGBUS
-         * to SIG_IGN, but hopefully noone will do that?
+         * to SIG_IGN, but hopefully no one will do that?
         */
        ret = send_sig_info(SIGBUS, &si, t);  /* synchronous? */
        if (ret < 0)
@@ -634,7 +634,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
                 * when the page is reread or dropped.  If an
                 * application assumes it will always get error on
                 * fsync, but does other operations on the fd before
-                 * and the page is dropped inbetween then the error
+                 * and the page is dropped between then the error
                 * will not be properly reported.
                 *
                 * This can already happen even without hwpoisoned
@@ -728,7 +728,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
 * The table matches them in order and calls the right handler.
 *
 * This is quite tricky because we can access page at any time
- * in its live cycle, so all accesses have to be extremly careful.
+ * in its live cycle, so all accesses have to be extremely careful.
 *
 * This is not complete. More states could be added.
 * For any missing state don't attempt recovery.
diff --git a/mm/memory.c b/mm/memory.c
index 9da8cab1b1b0..ce22a250926f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1410,6 +1410,13 @@ no_page_table:
        return page;
 }
+static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
+{
+        return (vma->vm_flags & VM_GROWSDOWN) &&
+                (vma->vm_start == addr) &&
+                !vma_stack_continue(vma->vm_prev, addr);
+}
 /**
 * __get_user_pages() - pin user pages in memory
 * @tsk:        task_struct of target task
@@ -1488,7 +1495,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                vma = find_extend_vma(mm, start);
                if (!vma && in_gate_area(mm, start)) {
                        unsigned long pg = start & PAGE_MASK;
-                        struct vm_area_struct *gate_vma = get_gate_vma(mm);
                        pgd_t *pgd;
                        pud_t *pud;
                        pmd_t *pmd;
@@ -1513,10 +1519,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                pte_unmap(pte);
                                return i ? : -EFAULT;
                        }
+                        vma = get_gate_vma(mm);
                        if (pages) {
                                struct page *page;
-                                page = vm_normal_page(gate_vma, start, *pte);
+                                page = vm_normal_page(vma, start, *pte);
                                if (!page) {
                                        if (!(gup_flags & FOLL_DUMP) &&
                                             is_zero_pfn(pte_pfn(*pte)))
@@ -1530,12 +1537,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                get_page(page);
                        }
                        pte_unmap(pte);
-                        if (vmas)
+                        goto next_page;
-                                vmas[i] = gate_vma;
-                        i++;
-                        start += PAGE_SIZE;
-                        nr_pages--;
-                        continue;
                }
                if (!vma ||
@@ -1549,6 +1551,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        continue;
                }
+                /*
+                 * If we don't actually want the page itself,
+                 * and it's the stack guard page, just skip it.
+                 */
+                if (!pages && stack_guard_page(vma, start))
+                        goto next_page;
                do {
                        struct page *page;
                        unsigned int foll_flags = gup_flags;
@@ -1631,6 +1640,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                                flush_anon_page(vma, page, start);
                                flush_dcache_page(page);
                        }
+next_page:
                        if (vmas)
                                vmas[i] = vma;
                        i++;
@@ -3678,7 +3688,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
                         */
 #ifdef CONFIG_HAVE_IOREMAP_PROT
                        vma = find_vma(mm, addr);
-                        if (!vma)
+                        if (!vma || vma->vm_start > addr)
                                break;
                        if (vma->vm_ops && vma->vm_ops->access)
                                ret = vma->vm_ops->access(vma, addr, buf,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 321fc7455df7..9ca1d604f7cd 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -375,7 +375,7 @@ void online_page(struct page *page)
 #endif
 #ifdef CONFIG_FLATMEM
-        max_mapnr = max(page_to_pfn(page), max_mapnr);
+        max_mapnr = max(pfn, max_mapnr);
 #endif
        ClearPageReserved(page);
@@ -724,7 +724,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                               pfn);
                        dump_page(page);
 #endif
-                        /* Becasue we don't have big zone->lock. we should
+                        /* Because we don't have big zone->lock. we should
                           check this again here. */
                        if (page_count(page)) {
                                not_managed++;
diff --git a/mm/migrate.c b/mm/migrate.c
index b0406d739ea7..34132f8e9109 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -375,7 +375,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
                 * redo the accounting that clear_page_dirty_for_io undid,
                 * but we can't use set_page_dirty because that function
                 * is actually a signal that all of the page has become dirty.
-                 * Wheras only part of our page may be dirty.
+                 * Whereas only part of our page may be dirty.
                 */
                __set_page_dirty_nobuffers(newpage);
        }
diff --git a/mm/mlock.c b/mm/mlock.c
index 2689a08c79af..6b55e3efe0df 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,13 +135,6 @@ void munlock_vma_page(struct page *page)
        }
 }
-static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
-{
-        return (vma->vm_flags & VM_GROWSDOWN) &&
-                (vma->vm_start == addr) &&
-                !vma_stack_continue(vma->vm_prev, addr);
-}
 /**
 * __mlock_vma_pages_range() -  mlock a range of pages in the vma.
 * @vma:   target vma
@@ -188,12 +181,6 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
        if (vma->vm_flags & VM_LOCKED)
                gup_flags |= FOLL_MLOCK;
-        /* We don't try to access the guard page of a stack vma */
-        if (stack_guard_page(vma, start)) {
-                addr += PAGE_SIZE;
-                nr_pages--;
-        }
        return __get_user_pages(current, mm, addr, nr_pages, gup_flags,
                                NULL, NULL, nonblocking);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 2ec8eb5a9cdd..e27e0cf0de03 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -259,7 +259,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
         * randomize_va_space to 2, which will still cause mm->start_brk
         * to be arbitrarily shifted
         */
-        if (mm->start_brk > PAGE_ALIGN(mm->end_data))
+        if (current->brk_randomized)
                min_brk = mm->start_brk;
        else
                min_brk = mm->end_data;
@@ -1814,11 +1814,14 @@ static int expand_downwards(struct vm_area_struct *vma,
                size = vma->vm_end - address;
                grow = (vma->vm_start - address) >> PAGE_SHIFT;
-                error = acct_stack_growth(vma, size, grow);
+                error = -ENOMEM;
-                if (!error) {
+                if (grow <= vma->vm_pgoff) {
-                        vma->vm_start = address;
+                        error = acct_stack_growth(vma, size, grow);
-                        vma->vm_pgoff -= grow;
+                        if (!error) {
-                        perf_event_mmap(vma);
+                                vma->vm_start = address;
+                                vma->vm_pgoff -= grow;
+                                perf_event_mmap(vma);
+                        }
                }
        }
        vma_unlock_anon_vma(vma);
diff --git a/mm/mremap.c b/mm/mremap.c
index 1de98d492ddc..a7c1f9f9b941 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -277,9 +277,16 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
        if (old_len > vma->vm_end - addr)
                goto Efault;
-        if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
+        /* Need to be careful about a growing mapping */
-                if (new_len > old_len)
+        if (new_len > old_len) {
+                unsigned long pgoff;
+                if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP))
                        goto Efault;
+                pgoff = (addr - vma->vm_start) >> PAGE_SHIFT;
+                pgoff += vma->vm_pgoff;
+                if (pgoff + (new_len >> PAGE_SHIFT) < pgoff)
+                        goto Einval;
        }
        if (vma->vm_flags & VM_LOCKED) {
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index e99f6cd1da1f..9109049f0bbc 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -150,7 +150,7 @@ unsigned long __init free_all_bootmem(void)
 {
        /*
         * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
-         *  because in some case like Node0 doesnt have RAM installed
+         *  because in some case like Node0 doesn't have RAM installed
         *  low ram will be on Node1
         * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
         *  will be used instead of only Node0 related
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6a819d1b2c7d..83fb72c108b7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -84,24 +84,6 @@ static bool has_intersects_mems_allowed(struct task_struct *tsk,
 #endif /* CONFIG_NUMA */
 /*
- * If this is a system OOM (not a memcg OOM) and the task selected to be
- * killed is not already running at high (RT) priorities, speed up the
- * recovery by boosting the dying task to the lowest FIFO priority.
- * That helps with the recovery and avoids interfering with RT tasks.
- */
-static void boost_dying_task_prio(struct task_struct *p,
-                                  struct mem_cgroup *mem)
-{
-        struct sched_param param = { .sched_priority = 1 };
-        if (mem)
-                return;
-        if (!rt_task(p))
-                sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
-}
-/*
 * The process p may have detached its own ->mm while exiting or through
 * use_mm(), but one or more of its subthreads may still have a valid
 * pointer.  Return p, or any of its subthreads with a valid ->mm, with
@@ -452,13 +434,6 @@ static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
        set_tsk_thread_flag(p, TIF_MEMDIE);
        force_sig(SIGKILL, p);
-        /*
-         * We give our sacrificial lamb high priority and access to
-         * all the memory it needs. That way it should be able to
-         * exit() and clear out its resources quickly...
-         */
-        boost_dying_task_prio(p, mem);
        return 0;
 }
 #undef K
@@ -482,7 +457,6 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
         */
        if (p->flags & PF_EXITING) {
                set_tsk_thread_flag(p, TIF_MEMDIE);
-                boost_dying_task_prio(p, mem);
                return 0;
        }
@@ -556,7 +530,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
         */
        if (fatal_signal_pending(current)) {
                set_thread_flag(TIF_MEMDIE);
-                boost_dying_task_prio(current, NULL);
                return;
        }
@@ -712,7 +685,6 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
         */
        if (fatal_signal_pending(current)) {
                set_thread_flag(TIF_MEMDIE);
-                boost_dying_task_prio(current, NULL);
                return;
        }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d6e7ba7373be..9f8a97b9a350 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -942,7 +942,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
                         * If breaking a large block of pages, move all free
                         * pages to the preferred allocation list. If falling
                         * back for a reclaimable kernel allocation, be more
-                         * agressive about taking ownership of free pages
+                         * aggressive about taking ownership of free pages
                         */
                        if (unlikely(current_order >= (pageblock_order >> 1)) ||
                                        start_migratetype == MIGRATE_RECLAIMABLE ||
@@ -3176,7 +3176,7 @@ static __init_refok int __build_all_zonelists(void *data)
 * Called with zonelists_mutex held always
 * unless system_state == SYSTEM_BOOTING.
 */
-void build_all_zonelists(void *data)
+void __ref build_all_zonelists(void *data)
 {
        set_zonelist_order();
@@ -3926,7 +3926,7 @@ static void __init find_usable_zone_for_movable(void)
 /*
 * The zone ranges provided by the architecture do not include ZONE_MOVABLE
- * because it is sized independant of architecture. Unlike the other zones,
+ * because it is sized independent of architecture. Unlike the other zones,
 * the starting point for ZONE_MOVABLE is not fixed. It may be different
 * in each node depending on the size of each node and how evenly kernelcore
 * is distributed. This helper function adjusts the zone ranges
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index a12cc3fa9859..99055010cece 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -377,7 +377,7 @@ not_enough_page:
 * @new: new id
 *
 * Returns old id at success, 0 at failure.
- * (There is no mem_cgroup useing 0 as its id)
+ * (There is no mem_cgroup using 0 as its id)
 */
 unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
                                        unsigned short old, unsigned short new)
diff --git a/mm/percpu.c b/mm/percpu.c
index 55d4d113fbd3..a160db39b810 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -342,7 +342,7 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
 * @chunk: chunk of interest
 *
 * Determine whether area map of @chunk needs to be extended to
- * accomodate a new allocation.
+ * accommodate a new allocation.
 *
 * CONTEXT:
 * pcpu_lock.
@@ -431,7 +431,7 @@ out_unlock:
 * depending on @head, is reduced by @tail bytes and @tail byte block
 * is inserted after the target block.
 *
- * @chunk->map must have enough free slots to accomodate the split.
+ * @chunk->map must have enough free slots to accommodate the split.
 *
 * CONTEXT:
 * pcpu_lock.
@@ -1435,7 +1435,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
        /*
         * Determine min_unit_size, alloc_size and max_upa such that
         * alloc_size is multiple of atom_size and is the smallest
-         * which can accomodate 4k aligned segments which are equal to
+         * which can accommodate 4k aligned segments which are equal to
         * or larger than min_unit_size.
         */
        min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE);
@@ -1550,7 +1550,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
 * @atom_size: allocation atom size
 * @cpu_distance_fn: callback to determine distance between cpus, optional
 * @alloc_fn: function to allocate percpu page
- * @free_fn: funtion to free percpu page
+ * @free_fn: function to free percpu page
 *
 * This is a helper to ease setting up embedded first percpu chunk and
 * can be called where pcpu_setup_first_chunk() is expected.
@@ -1678,7 +1678,7 @@ out_free:
 * pcpu_page_first_chunk - map the first chunk using PAGE_SIZE pages
 * @reserved_size: the size of reserved percpu area in bytes
 * @alloc_fn: function to allocate percpu page, always called with PAGE_SIZE
- * @free_fn: funtion to free percpu page, always called with PAGE_SIZE
+ * @free_fn: function to free percpu page, always called with PAGE_SIZE
 * @populate_pte_fn: function to populate pte
 *
 * This is a helper to ease setting up page-remapped first percpu
diff --git a/mm/shmem.c b/mm/shmem.c
index 58da7c150ba6..8fa27e4e582a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -421,7 +421,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
                 * a waste to allocate index if we cannot allocate data.
                 */
                if (sbinfo->max_blocks) {
-                        if (percpu_counter_compare(&sbinfo->used_blocks, (sbinfo->max_blocks - 1)) > 0)
+                        if (percpu_counter_compare(&sbinfo->used_blocks,
+                                                sbinfo->max_blocks - 1) >= 0)
                                return ERR_PTR(-ENOSPC);
                        percpu_counter_inc(&sbinfo->used_blocks);
                        spin_lock(&inode->i_lock);
@@ -1397,7 +1398,8 @@ repeat:
                shmem_swp_unmap(entry);
                sbinfo = SHMEM_SB(inode->i_sb);
                if (sbinfo->max_blocks) {
-                        if ((percpu_counter_compare(&sbinfo->used_blocks, sbinfo->max_blocks) > 0) ||
+                        if (percpu_counter_compare(&sbinfo->used_blocks,
+                                                sbinfo->max_blocks) >= 0 ||
                            shmem_acct_block(info->flags)) {
                                spin_unlock(&info->lock);
                                error = -ENOSPC;
diff --git a/mm/slab.c b/mm/slab.c
index 568803f121a8..46a9c163a92f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -878,7 +878,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
        nc = kmalloc_node(memsize, gfp, node);
        /*
         * The array_cache structures contain pointers to free object.
-         * However, when such objects are allocated or transfered to another
+         * However, when such objects are allocated or transferred to another
         * cache the pointers are not cleared and they could be counted as
         * valid references during a kmemleak scan. Therefore, kmemleak must
         * not scan such objects.
@@ -2606,7 +2606,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
 *
 * The cache must be empty before calling this function.
 *
- * The caller must guarantee that noone will allocate memory from the cache
+ * The caller must guarantee that no one will allocate memory from the cache
 * during the kmem_cache_destroy().
 */
 void kmem_cache_destroy(struct kmem_cache *cachep)
diff --git a/mm/slub.c b/mm/slub.c
index f881874843a5..94d2a33a866e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -64,7 +64,7 @@
 *   we must stay away from it for a while since we may cause a bouncing
 *   cacheline if we try to acquire the lock. So go onto the next slab.
 *   If all pages are busy then we may allocate a new slab instead of reusing
- *   a partial slab. A new slab has noone operating on it and thus there is
+ *   a partial slab. A new slab has no one operating on it and thus there is
 *   no danger of cacheline contention.
 *
 *   Interrupts are disabled during allocation and deallocation in order to
@@ -1929,7 +1929,7 @@ redo:
        else {
 #ifdef CONFIG_CMPXCHG_LOCAL
                /*
-                 * The cmpxchg will only match if there was no additonal
+                 * The cmpxchg will only match if there was no additional
                 * operation and if we are on the right processor.
                 *
                 * The cmpxchg does the following atomically (without lock semantics!)
@@ -3547,7 +3547,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
        ret = slab_alloc(s, gfpflags, NUMA_NO_NODE, caller);
-        /* Honor the call site pointer we recieved. */
+        /* Honor the call site pointer we received. */
        trace_kmalloc(caller, ret, size, s->size, gfpflags);
        return ret;
@@ -3577,7 +3577,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
        ret = slab_alloc(s, gfpflags, node, caller);
-        /* Honor the call site pointer we recieved. */
+        /* Honor the call site pointer we received. */
        trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
        return ret;
diff --git a/mm/sparse.c b/mm/sparse.c
index 93250207c5cf..aa64b12831a2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -500,7 +500,7 @@ void __init sparse_init(void)
         * so alloc 2M (with 2M align) and 24 bytes in turn will
         * make next 2M slip to one more 2M later.
         * then in big system, the memory will have a lot of holes...
-         * here try to allocate 2M pages continously.
+         * here try to allocate 2M pages continuously.
         *
         * powerpc need to call sparse_init_one_section right after each
         * sparse_early_mem_map_alloc, so allocate usemap_map at first.
diff --git a/mm/util.c b/mm/util.c
index f126975ef23e..e7b103a6fd21 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -227,7 +227,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
 /*
 * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
 * back to the regular GUP.
- * If the architecture not support this fucntion, simply return with no
+ * If the architecture not support this function, simply return with no
 * page pinned
 */
 int __attribute__((weak)) __get_user_pages_fast(unsigned long start,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f73b8657c2d0..f6b435c80079 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,6 +41,7 @@
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
+#include <linux/oom.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1065,7 +1066,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 * surrounding the tag page.  Only take those pages of
                 * the same active state as that tag page.  We may safely
                 * round the target page pfn down to the requested order
-                 * as the mem_map is guarenteed valid out to MAX_ORDER,
+                 * as the mem_map is guaranteed valid out to MAX_ORDER,
                 * where that page is in a different zone we will detect
                 * it from its zone id and abort this block scan.
                 */
@@ -1988,17 +1989,12 @@ static bool zone_reclaimable(struct zone *zone)
        return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
 }
-/*
+/* All zones in zonelist are unreclaimable? */
- * As hibernation is going on, kswapd is freezed so that it can't mark
- * the zone into all_unreclaimable. It can't handle OOM during hibernation.
- * So let's check zone's unreclaimable in direct reclaim as well as kswapd.
- */
 static bool all_unreclaimable(struct zonelist *zonelist,
                struct scan_control *sc)
 {
        struct zoneref *z;
        struct zone *zone;
-        bool all_unreclaimable = true;
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                        gfp_zone(sc->gfp_mask), sc->nodemask) {
@@ -2006,13 +2002,11 @@ static bool all_unreclaimable(struct zonelist *zonelist,
                        continue;
                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                        continue;
-                if (zone_reclaimable(zone)) {
+                if (!zone->all_unreclaimable)
-                        all_unreclaimable = false;
+                        return false;
-                        break;
-                }
        }
-        return all_unreclaimable;
+        return true;
 }
 /*
@@ -2108,6 +2102,14 @@ out:
        if (sc->nr_reclaimed)
                return sc->nr_reclaimed;
+        /*
+         * As hibernation is going on, kswapd is freezed so that it can't mark
+         * the zone into all_unreclaimable. Thus bypassing all_unreclaimable
+         * check.
+         */
+        if (oom_killer_disabled)
+                return 0;
        /* top priority shrink_zones still had more to do? don't OOM, then */
        if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc))
                return 1;
@@ -2224,7 +2226,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 *   o a 16M DMA zone that is balanced will not balance a zone on any
 *     reasonable sized machine
 *   o On all other machines, the top zone must be at least a reasonable
- *     precentage of the middle zones. For example, on 32-bit x86, highmem
+ *     percentage of the middle zones. For example, on 32-bit x86, highmem
 *     would need to be at least 256M for it to be balance a whole node.
 *     Similarly, on x86-64 the Normal zone would need to be at least 1G
 *     to balance a node on its own. These seemed like reasonable ratios.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 772b39b87d95..897ea9e88238 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -321,9 +321,12 @@ static inline void mod_state(struct zone *zone,
                /*
                 * The fetching of the stat_threshold is racy. We may apply
                 * a counter threshold to the wrong the cpu if we get
-                 * rescheduled while executing here. However, the following
+                 * rescheduled while executing here. However, the next
-                 * will apply the threshold again and therefore bring the
+                 * counter update will apply the threshold again and
-                 * counter under the threshold.
+                 * therefore bring the counter under the threshold again.
+                 *
+                 * Most of the time the thresholds are the same anyways
+                 * for all cpus in a zone.
                 */
                t = this_cpu_read(pcp->stat_threshold);
@@ -945,7 +948,16 @@ static const char * const vmstat_text[] = {
        "unevictable_pgs_cleared",
        "unevictable_pgs_stranded",
        "unevictable_pgs_mlockfreed",
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+        "thp_fault_alloc",
+        "thp_fault_fallback",
+        "thp_collapse_alloc",
+        "thp_collapse_alloc_failed",
+        "thp_split",
 #endif
+#endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
 static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
author	James Morris <jmorris@namei.org>	2011-04-19 07:32:41 -0400
committer	James Morris <jmorris@namei.org>	2011-04-19 07:32:41 -0400
commit	d4ab4e6a23f805abb8fc3cc34525eec3788aeca1 (patch)
tree	eefd82c155bc27469a85667d759cd90facf4a6e3 /mm
parent	c0fa797ae6cd02ff87c0bfe0d509368a3b45640e (diff)
parent	96fd2d57b8252e16dfacf8941f7a74a6119197f5 (diff)