Merge commit 'v2.6.38-rc2' into topic/misc

author: Takashi Iwai <tiwai@suse.de> 2011-01-24 12:41:10 -0500
committer: Takashi Iwai <tiwai@suse.de> 2011-01-24 12:41:10 -0500
commit: 49c6ad430d74fb7995990be0f66165e4b94a6bc5 (patch)
tree: 9a0b4d5158cea625efd1f4185cdea79fe9f10d85 /mm
parent: 233d84c46c2253d13e10b42d88c14748fbb67a98 (diff)
parent: 1bae4ce27c9c90344f23c65ea6966c50ffeae2f5 (diff)
6 files changed, 134 insertions, 92 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index 6d592a021072..8be430b812de 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -406,6 +406,10 @@ static int compact_finished(struct zone *zone,
        if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
                return COMPACT_CONTINUE;
+        /*
+         * order == -1 is expected when compacting via
+         * /proc/sys/vm/compact_memory
+         */
        if (cc->order == -1)
                return COMPACT_CONTINUE;
@@ -454,6 +458,13 @@ unsigned long compaction_suitable(struct zone *zone, int order)
                return COMPACT_SKIPPED;
        /*
+         * order == -1 is expected when compacting via
+         * /proc/sys/vm/compact_memory
+         */
+        if (order == -1)
+                return COMPACT_CONTINUE;
+        /*
         * fragmentation index determines if allocation failures are due to
         * low memory or external fragmentation
         *
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 004c9c2aac78..e187454d82f6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page)
                BUG_ON(!PageDirty(page_tail));
                BUG_ON(!PageSwapBacked(page_tail));
+                mem_cgroup_split_huge_fixup(page, page_tail);
                lru_add_page_tail(zone, page, page_tail);
        }
@@ -1837,9 +1839,9 @@ static void collapse_huge_page(struct mm_struct *mm,
        spin_lock(ptl);
        isolated = __collapse_huge_page_isolate(vma, address, pte);
        spin_unlock(ptl);
-        pte_unmap(pte);
        if (unlikely(!isolated)) {
+                pte_unmap(pte);
                spin_lock(&mm->page_table_lock);
                BUG_ON(!pmd_none(*pmd));
                set_pmd_at(mm, address, pmd, _pmd);
@@ -1856,6 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        anon_vma_unlock(vma->anon_vma);
        __collapse_huge_page_copy(pte, new_page, vma, address, ptl);
+        pte_unmap(pte);
        __SetPageUptodate(new_page);
        pgtable = pmd_pgtable(_pmd);
        VM_BUG_ON(page_count(pgtable) != 1);
diff --git a/mm/memblock.c b/mm/memblock.c
index 400dc62697d7..bdba245d8afd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -683,13 +683,13 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
 int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
 {
-        int idx = memblock_search(&memblock.reserved, base);
+        int idx = memblock_search(&memblock.memory, base);
        if (idx == -1)
                return 0;
-        return memblock.reserved.regions[idx].base <= base &&
+        return memblock.memory.regions[idx].base <= base &&
-                (memblock.reserved.regions[idx].base +
+                (memblock.memory.regions[idx].base +
-                 memblock.reserved.regions[idx].size) >= (base + size);
+                 memblock.memory.regions[idx].size) >= (base + size);
 }
 int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8ab841031436..db76ef726293 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
 }
 static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
-                                         struct page_cgroup *pc,
+                                         bool file, int nr_pages)
-                                         bool charge)
 {
-        int val = (charge) ? 1 : -1;
        preempt_disable();
-        if (PageCgroupCache(pc))
+        if (file)
-                __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val);
+                __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
        else
-                __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val);
+                __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
-        if (charge)
+        /* pagein of a big page is an event. So, ignore page size */
+        if (nr_pages > 0)
                __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
        else
                __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
-        __this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]);
+        __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
        preempt_enable();
 }
@@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
         * removed from global LRU.
         */
        mz = page_cgroup_zoneinfo(pc);
-        MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+        /* huge page split is done under lru_lock. so, we have no races. */
+        MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
        if (mem_cgroup_is_root(pc->mem_cgroup))
                return;
        VM_BUG_ON(list_empty(&pc->lru));
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
                return;
        pc = lookup_page_cgroup(page);
-        /*
-         * Used bit is set without atomic ops but after smp_wmb().
-         * For making pc->mem_cgroup visible, insert smp_rmb() here.
-         */
-        smp_rmb();
        /* unused or root page is not rotated. */
-        if (!PageCgroupUsed(pc) || mem_cgroup_is_root(pc->mem_cgroup))
+        if (!PageCgroupUsed(pc))
+                return;
+        /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+        smp_rmb();
+        if (mem_cgroup_is_root(pc->mem_cgroup))
                return;
        mz = page_cgroup_zoneinfo(pc);
        list_move(&pc->lru, &mz->lists[lru]);
@@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
                return;
        pc = lookup_page_cgroup(page);
        VM_BUG_ON(PageCgroupAcctLRU(pc));
-        /*
-         * Used bit is set without atomic ops but after smp_wmb().
-         * For making pc->mem_cgroup visible, insert smp_rmb() here.
-         */
-        smp_rmb();
        if (!PageCgroupUsed(pc))
                return;
+        /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+        smp_rmb();
        mz = page_cgroup_zoneinfo(pc);
-        MEM_CGROUP_ZSTAT(mz, lru) += 1;
+        /* huge page split is done under lru_lock. so, we have no races. */
+        MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
        SetPageCgroupAcctLRU(pc);
        if (mem_cgroup_is_root(pc->mem_cgroup))
                return;
@@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
                return NULL;
        pc = lookup_page_cgroup(page);
-        /*
-         * Used bit is set without atomic ops but after smp_wmb().
-         * For making pc->mem_cgroup visible, insert smp_rmb() here.
-         */
-        smp_rmb();
        if (!PageCgroupUsed(pc))
                return NULL;
+        /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
+        smp_rmb();
        mz = page_cgroup_zoneinfo(pc);
        if (!mz)
                return NULL;
@@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page,
        if (unlikely(!mem || !PageCgroupUsed(pc)))
                goto out;
        /* pc->mem_cgroup is unstable ? */
-        if (unlikely(mem_cgroup_stealed(mem))) {
+        if (unlikely(mem_cgroup_stealed(mem)) || PageTransHuge(page)) {
                /* take a lock against to access pc->mem_cgroup */
                move_lock_page_cgroup(pc, &flags);
                need_unlock = true;
@@ -2084,14 +2076,27 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
        return mem;
 }
-/*
+static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
- * commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be
+                                       struct page_cgroup *pc,
- * USED state. If already USED, uncharge and return.
+                                       enum charge_type ctype,
- */
+                                       int page_size)
-static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
-                                         struct page_cgroup *pc,
-                                         enum charge_type ctype)
 {
+        int nr_pages = page_size >> PAGE_SHIFT;
+        /* try_charge() can return NULL to *memcg, taking care of it. */
+        if (!mem)
+                return;
+        lock_page_cgroup(pc);
+        if (unlikely(PageCgroupUsed(pc))) {
+                unlock_page_cgroup(pc);
+                mem_cgroup_cancel_charge(mem, page_size);
+                return;
+        }
+        /*
+         * we don't need page_cgroup_lock about tail pages, becase they are not
+         * accessed by any other context at this point.
+         */
        pc->mem_cgroup = mem;
        /*
         * We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2115,35 +2120,7 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
                break;
        }
-        mem_cgroup_charge_statistics(mem, pc, true);
+        mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
-}
-static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
-                                       struct page_cgroup *pc,
-                                       enum charge_type ctype,
-                                       int page_size)
-{
-        int i;
-        int count = page_size >> PAGE_SHIFT;
-        /* try_charge() can return NULL to *memcg, taking care of it. */
-        if (!mem)
-                return;
-        lock_page_cgroup(pc);
-        if (unlikely(PageCgroupUsed(pc))) {
-                unlock_page_cgroup(pc);
-                mem_cgroup_cancel_charge(mem, page_size);
-                return;
-        }
-        /*
-         * we don't need page_cgroup_lock about tail pages, becase they are not
-         * accessed by any other context at this point.
-         */
-        for (i = 0; i < count; i++)
-                ____mem_cgroup_commit_charge(mem, pc + i, ctype);
        unlock_page_cgroup(pc);
        /*
         * "charge_statistics" updated event counter. Then, check it.
@@ -2153,6 +2130,46 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
        memcg_check_events(mem, pc->page);
 }
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
+                        (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
+/*
+ * Because tail pages are not marked as "used", set it. We're under
+ * zone->lru_lock, 'splitting on pmd' and compund_lock.
+ */
+void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail)
+{
+        struct page_cgroup *head_pc = lookup_page_cgroup(head);
+        struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
+        unsigned long flags;
+        /*
+         * We have no races with charge/uncharge but will have races with
+         * page state accounting.
+         */
+        move_lock_page_cgroup(head_pc, &flags);
+        tail_pc->mem_cgroup = head_pc->mem_cgroup;
+        smp_wmb(); /* see __commit_charge() */
+        if (PageCgroupAcctLRU(head_pc)) {
+                enum lru_list lru;
+                struct mem_cgroup_per_zone *mz;
+                /*
+                 * LRU flags cannot be copied because we need to add tail
+                 *.page to LRU by generic call and our hook will be called.
+                 * We hold lru_lock, then, reduce counter directly.
+                 */
+                lru = page_lru(head);
+                mz = page_cgroup_zoneinfo(head_pc);
+                MEM_CGROUP_ZSTAT(mz, lru) -= 1;
+        }
+        tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
+        move_unlock_page_cgroup(head_pc, &flags);
+}
+#endif
 /**
 * __mem_cgroup_move_account - move account of the page
 * @pc: page_cgroup of the page.
@@ -2171,8 +2188,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
 */
 static void __mem_cgroup_move_account(struct page_cgroup *pc,
-        struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
+        struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge,
+        int charge_size)
 {
+        int nr_pages = charge_size >> PAGE_SHIFT;
        VM_BUG_ON(from == to);
        VM_BUG_ON(PageLRU(pc->page));
        VM_BUG_ON(!page_is_cgroup_locked(pc));
@@ -2186,14 +2206,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
                __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
                preempt_enable();
        }
-        mem_cgroup_charge_statistics(from, pc, false);
+        mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
        if (uncharge)
                /* This is not "cancel", but cancel_charge does all we need. */
-                mem_cgroup_cancel_charge(from, PAGE_SIZE);
+                mem_cgroup_cancel_charge(from, charge_size);
        /* caller should have done css_get */
        pc->mem_cgroup = to;
-        mem_cgroup_charge_statistics(to, pc, true);
+        mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
        /*
         * We charges against "to" which may not have any tasks. Then, "to"
         * can be under rmdir(). But in current implementation, caller of
@@ -2208,15 +2228,19 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
 * __mem_cgroup_move_account()
 */
 static int mem_cgroup_move_account(struct page_cgroup *pc,
-                struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
+                struct mem_cgroup *from, struct mem_cgroup *to,
+                bool uncharge, int charge_size)
 {
        int ret = -EINVAL;
        unsigned long flags;
+        if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
+                return -EBUSY;
        lock_page_cgroup(pc);
        if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
                move_lock_page_cgroup(pc, &flags);
-                __mem_cgroup_move_account(pc, from, to, uncharge);
+                __mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
                move_unlock_page_cgroup(pc, &flags);
                ret = 0;
        }
@@ -2241,6 +2265,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
        struct cgroup *cg = child->css.cgroup;
        struct cgroup *pcg = cg->parent;
        struct mem_cgroup *parent;
+        int charge = PAGE_SIZE;
+        unsigned long flags;
        int ret;
        /* Is ROOT ? */
@@ -2252,17 +2278,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
                goto out;
        if (isolate_lru_page(page))
                goto put;
+        /* The page is isolated from LRU and we have no race with splitting */
+        charge = PAGE_SIZE << compound_order(page);
        parent = mem_cgroup_from_cont(pcg);
-        ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false,
+        ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge);
-                                      PAGE_SIZE);
        if (ret || !parent)
                goto put_back;
-        ret = mem_cgroup_move_account(pc, child, parent, true);
+        if (charge > PAGE_SIZE)
+                flags = compound_lock_irqsave(page);
+        ret = mem_cgroup_move_account(pc, child, parent, true, charge);
        if (ret)
-                mem_cgroup_cancel_charge(parent, PAGE_SIZE);
+                mem_cgroup_cancel_charge(parent, charge);
 put_back:
+        if (charge > PAGE_SIZE)
+                compound_unlock_irqrestore(page, flags);
        putback_lru_page(page);
 put:
        put_page(page);
@@ -2546,7 +2578,6 @@ direct_uncharge:
 static struct mem_cgroup *
 __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
 {
-        int i;
        int count;
        struct page_cgroup *pc;
        struct mem_cgroup *mem = NULL;
@@ -2596,8 +2627,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
                break;
        }
-        for (i = 0; i < count; i++)
+        mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
-                mem_cgroup_charge_statistics(mem, pc + i, false);
        ClearPageCgroupUsed(pc);
        /*
@@ -4844,7 +4874,7 @@ retry:
                                goto put;
                        pc = lookup_page_cgroup(page);
                        if (!mem_cgroup_move_account(pc,
-                                                mc.from, mc.to, false)) {
+                                        mc.from, mc.to, false, PAGE_SIZE)) {
                                mc.precharge--;
                                /* we uncharge from mc.from later. */
                                mc.moved_charge++;
diff --git a/mm/truncate.c b/mm/truncate.c
index 3c2d5ddfa0d4..49feb46e77b8 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -549,13 +549,12 @@ EXPORT_SYMBOL(truncate_pagecache);
 * @inode: inode
 * @newsize: new file size
 *
- * truncate_setsize updastes i_size update and performs pagecache
+ * truncate_setsize updates i_size and performs pagecache truncation (if
- * truncation (if necessary) for a file size updates. It will be
+ * necessary) to @newsize. It will be typically be called from the filesystem's
- * typically be called from the filesystem's setattr function when
+ * setattr function when ATTR_SIZE is passed in.
- * ATTR_SIZE is passed in.
 *
- * Must be called with inode_mutex held and after all filesystem
+ * Must be called with inode_mutex held and before all filesystem specific
- * specific block truncation has been performed.
+ * block truncation has been performed.
 */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 47a50962ce81..f5d90dedebba 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -41,7 +41,6 @@
 #include <linux/memcontrol.h>
 #include <linux/delayacct.h>
 #include <linux/sysctl.h>
-#include <linux/compaction.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
author	Takashi Iwai <tiwai@suse.de>	2011-01-24 12:41:10 -0500
committer	Takashi Iwai <tiwai@suse.de>	2011-01-24 12:41:10 -0500
commit	49c6ad430d74fb7995990be0f66165e4b94a6bc5 (patch)
tree	9a0b4d5158cea625efd1f4185cdea79fe9f10d85 /mm
parent	233d84c46c2253d13e10b42d88c14748fbb67a98 (diff)
parent	1bae4ce27c9c90344f23c65ea6966c50ffeae2f5 (diff)

diff --git a/mm/compaction.c b/mm/compaction.c index 6d592a021072..8be430b812de 100644 --- a/mm/compaction.c +++ b/mm/compaction.c
@@ -406,6 +406,10 @@ static int compact_finished(struct zone *zone,
406	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))	406	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
407	return COMPACT_CONTINUE;	407	return COMPACT_CONTINUE;
408		408
		409	/*
		410	* order == -1 is expected when compacting via
		411	* /proc/sys/vm/compact_memory
		412	*/
409	if (cc->order == -1)	413	if (cc->order == -1)
410	return COMPACT_CONTINUE;	414	return COMPACT_CONTINUE;
411		415
@@ -454,6 +458,13 @@ unsigned long compaction_suitable(struct zone *zone, int order)
454	return COMPACT_SKIPPED;	458	return COMPACT_SKIPPED;
455		459
456	/*	460	/*
		461	* order == -1 is expected when compacting via
		462	* /proc/sys/vm/compact_memory
		463	*/
		464	if (order == -1)
		465	return COMPACT_CONTINUE;
		466
		467	/*
457	* fragmentation index determines if allocation failures are due to	468	* fragmentation index determines if allocation failures are due to
458	* low memory or external fragmentation	469	* low memory or external fragmentation
459	*	470	*


diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 004c9c2aac78..e187454d82f6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c
@@ -1203,6 +1203,8 @@ static void __split_huge_page_refcount(struct page *page)
1203	BUG_ON(!PageDirty(page_tail));	1203	BUG_ON(!PageDirty(page_tail));
1204	BUG_ON(!PageSwapBacked(page_tail));	1204	BUG_ON(!PageSwapBacked(page_tail));
1205		1205
		1206	mem_cgroup_split_huge_fixup(page, page_tail);
		1207
1206	lru_add_page_tail(zone, page, page_tail);	1208	lru_add_page_tail(zone, page, page_tail);
1207	}	1209	}
1208		1210
@@ -1837,9 +1839,9 @@ static void collapse_huge_page(struct mm_struct *mm,
1837	spin_lock(ptl);	1839	spin_lock(ptl);
1838	isolated = __collapse_huge_page_isolate(vma, address, pte);	1840	isolated = __collapse_huge_page_isolate(vma, address, pte);
1839	spin_unlock(ptl);	1841	spin_unlock(ptl);
1840	pte_unmap(pte);
1841		1842
1842	if (unlikely(!isolated)) {	1843	if (unlikely(!isolated)) {
		1844	pte_unmap(pte);
1843	spin_lock(&mm->page_table_lock);	1845	spin_lock(&mm->page_table_lock);
1844	BUG_ON(!pmd_none(*pmd));	1846	BUG_ON(!pmd_none(*pmd));
1845	set_pmd_at(mm, address, pmd, _pmd);	1847	set_pmd_at(mm, address, pmd, _pmd);
@@ -1856,6 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm,
1856	anon_vma_unlock(vma->anon_vma);	1858	anon_vma_unlock(vma->anon_vma);
1857		1859
1858	__collapse_huge_page_copy(pte, new_page, vma, address, ptl);	1860	__collapse_huge_page_copy(pte, new_page, vma, address, ptl);
		1861	pte_unmap(pte);
1859	__SetPageUptodate(new_page);	1862	__SetPageUptodate(new_page);
1860	pgtable = pmd_pgtable(_pmd);	1863	pgtable = pmd_pgtable(_pmd);
1861	VM_BUG_ON(page_count(pgtable) != 1);	1864	VM_BUG_ON(page_count(pgtable) != 1);


diff --git a/mm/memblock.c b/mm/memblock.c index 400dc62697d7..bdba245d8afd 100644 --- a/mm/memblock.c +++ b/mm/memblock.c
@@ -683,13 +683,13 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
683		683
684	int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)	684	int __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t size)
685	{	685	{
686	int idx = memblock_search(&memblock.reserved, base);	686	int idx = memblock_search(&memblock.memory, base);
687		687
688	if (idx == -1)	688	if (idx == -1)
689	return 0;	689	return 0;
690	return memblock.reserved.regions[idx].base <= base &&	690	return memblock.memory.regions[idx].base <= base &&
691	(memblock.reserved.regions[idx].base +	691	(memblock.memory.regions[idx].base +
692	memblock.reserved.regions[idx].size) >= (base + size);	692	memblock.memory.regions[idx].size) >= (base + size);
693	}	693	}
694		694
695	int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)	695	int __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size)


diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ab841031436..db76ef726293 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -600,23 +600,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem,
600	}	600	}
601		601
602	static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,	602	static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
603	struct page_cgroup *pc,	603	bool file, int nr_pages)
604	bool charge)
605	{	604	{
606	int val = (charge) ? 1 : -1;
607
608	preempt_disable();	605	preempt_disable();
609		606
610	if (PageCgroupCache(pc))	607	if (file)
611	__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val);	608	__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], nr_pages);
612	else	609	else
613	__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val);	610	__this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], nr_pages);
614		611
615	if (charge)	612	/* pagein of a big page is an event. So, ignore page size */
		613	if (nr_pages > 0)
616	__this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);	614	__this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
617	else	615	else
618	__this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);	616	__this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
619	__this_cpu_inc(mem->stat->count[MEM_CGROUP_EVENTS]);	617
		618	__this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
620		619
621	preempt_enable();	620	preempt_enable();
622	}	621	}
@@ -815,7 +814,8 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru)
815	* removed from global LRU.	814	* removed from global LRU.
816	*/	815	*/
817	mz = page_cgroup_zoneinfo(pc);	816	mz = page_cgroup_zoneinfo(pc);
818	MEM_CGROUP_ZSTAT(mz, lru) -= 1;	817	/* huge page split is done under lru_lock. so, we have no races. */
		818	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
819	if (mem_cgroup_is_root(pc->mem_cgroup))	819	if (mem_cgroup_is_root(pc->mem_cgroup))
820	return;	820	return;
821	VM_BUG_ON(list_empty(&pc->lru));	821	VM_BUG_ON(list_empty(&pc->lru));
@@ -836,13 +836,12 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru)
836	return;	836	return;
837		837
838	pc = lookup_page_cgroup(page);	838	pc = lookup_page_cgroup(page);
839	/*
840	* Used bit is set without atomic ops but after smp_wmb().
841	* For making pc->mem_cgroup visible, insert smp_rmb() here.
842	*/
843	smp_rmb();
844	/* unused or root page is not rotated. */	839	/* unused or root page is not rotated. */
845	if (!PageCgroupUsed(pc) \|\| mem_cgroup_is_root(pc->mem_cgroup))	840	if (!PageCgroupUsed(pc))
		841	return;
		842	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
		843	smp_rmb();
		844	if (mem_cgroup_is_root(pc->mem_cgroup))
846	return;	845	return;
847	mz = page_cgroup_zoneinfo(pc);	846	mz = page_cgroup_zoneinfo(pc);
848	list_move(&pc->lru, &mz->lists[lru]);	847	list_move(&pc->lru, &mz->lists[lru]);
@@ -857,16 +856,13 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru)
857	return;	856	return;
858	pc = lookup_page_cgroup(page);	857	pc = lookup_page_cgroup(page);
859	VM_BUG_ON(PageCgroupAcctLRU(pc));	858	VM_BUG_ON(PageCgroupAcctLRU(pc));
860	/*
861	* Used bit is set without atomic ops but after smp_wmb().
862	* For making pc->mem_cgroup visible, insert smp_rmb() here.
863	*/
864	smp_rmb();
865	if (!PageCgroupUsed(pc))	859	if (!PageCgroupUsed(pc))
866	return;	860	return;
867		861	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
		862	smp_rmb();
868	mz = page_cgroup_zoneinfo(pc);	863	mz = page_cgroup_zoneinfo(pc);
869	MEM_CGROUP_ZSTAT(mz, lru) += 1;	864	/* huge page split is done under lru_lock. so, we have no races. */
		865	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
870	SetPageCgroupAcctLRU(pc);	866	SetPageCgroupAcctLRU(pc);
871	if (mem_cgroup_is_root(pc->mem_cgroup))	867	if (mem_cgroup_is_root(pc->mem_cgroup))
872	return;	868	return;
@@ -1030,14 +1026,10 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page)
1030	return NULL;	1026	return NULL;
1031		1027
1032	pc = lookup_page_cgroup(page);	1028	pc = lookup_page_cgroup(page);
1033	/*
1034	* Used bit is set without atomic ops but after smp_wmb().
1035	* For making pc->mem_cgroup visible, insert smp_rmb() here.
1036	*/
1037	smp_rmb();
1038	if (!PageCgroupUsed(pc))	1029	if (!PageCgroupUsed(pc))
1039	return NULL;	1030	return NULL;
1040		1031	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
		1032	smp_rmb();
1041	mz = page_cgroup_zoneinfo(pc);	1033	mz = page_cgroup_zoneinfo(pc);
1042	if (!mz)	1034	if (!mz)
1043	return NULL;	1035	return NULL;
@@ -1615,7 +1607,7 @@ void mem_cgroup_update_page_stat(struct page *page,
1615	if (unlikely(!mem \|\| !PageCgroupUsed(pc)))	1607	if (unlikely(!mem \|\| !PageCgroupUsed(pc)))
1616	goto out;	1608	goto out;
1617	/* pc->mem_cgroup is unstable ? */	1609	/* pc->mem_cgroup is unstable ? */
1618	if (unlikely(mem_cgroup_stealed(mem))) {	1610	if (unlikely(mem_cgroup_stealed(mem)) \|\| PageTransHuge(page)) {
1619	/* take a lock against to access pc->mem_cgroup */	1611	/* take a lock against to access pc->mem_cgroup */
1620	move_lock_page_cgroup(pc, &flags);	1612	move_lock_page_cgroup(pc, &flags);
1621	need_unlock = true;	1613	need_unlock = true;
@@ -2084,14 +2076,27 @@ struct mem_cgroup try_get_mem_cgroup_from_page(struct page page)
2084	return mem;	2076	return mem;
2085	}	2077	}
2086		2078
2087	/*	2079	static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2088	* commit a charge got by __mem_cgroup_try_charge() and makes page_cgroup to be	2080	struct page_cgroup *pc,
2089	* USED state. If already USED, uncharge and return.	2081	enum charge_type ctype,
2090	*/	2082	int page_size)
2091	static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2092	struct page_cgroup *pc,
2093	enum charge_type ctype)
2094	{	2083	{
		2084	int nr_pages = page_size >> PAGE_SHIFT;
		2085
		2086	/* try_charge() can return NULL to memcg, taking care of it. /
		2087	if (!mem)
		2088	return;
		2089
		2090	lock_page_cgroup(pc);
		2091	if (unlikely(PageCgroupUsed(pc))) {
		2092	unlock_page_cgroup(pc);
		2093	mem_cgroup_cancel_charge(mem, page_size);
		2094	return;
		2095	}
		2096	/*
		2097	* we don't need page_cgroup_lock about tail pages, becase they are not
		2098	* accessed by any other context at this point.
		2099	*/
2095	pc->mem_cgroup = mem;	2100	pc->mem_cgroup = mem;
2096	/*	2101	/*
2097	* We access a page_cgroup asynchronously without lock_page_cgroup().	2102	* We access a page_cgroup asynchronously without lock_page_cgroup().
@@ -2115,35 +2120,7 @@ static void ____mem_cgroup_commit_charge(struct mem_cgroup *mem,
2115	break;	2120	break;
2116	}	2121	}
2117		2122
2118	mem_cgroup_charge_statistics(mem, pc, true);	2123	mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), nr_pages);
2119	}
2120
2121	static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2122	struct page_cgroup *pc,
2123	enum charge_type ctype,
2124	int page_size)
2125	{
2126	int i;
2127	int count = page_size >> PAGE_SHIFT;
2128
2129	/* try_charge() can return NULL to memcg, taking care of it. /
2130	if (!mem)
2131	return;
2132
2133	lock_page_cgroup(pc);
2134	if (unlikely(PageCgroupUsed(pc))) {
2135	unlock_page_cgroup(pc);
2136	mem_cgroup_cancel_charge(mem, page_size);
2137	return;
2138	}
2139
2140	/*
2141	* we don't need page_cgroup_lock about tail pages, becase they are not
2142	* accessed by any other context at this point.
2143	*/
2144	for (i = 0; i < count; i++)
2145	____mem_cgroup_commit_charge(mem, pc + i, ctype);
2146
2147	unlock_page_cgroup(pc);	2124	unlock_page_cgroup(pc);
2148	/*	2125	/*
2149	* "charge_statistics" updated event counter. Then, check it.	2126	* "charge_statistics" updated event counter. Then, check it.
@@ -2153,6 +2130,46 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2153	memcg_check_events(mem, pc->page);	2130	memcg_check_events(mem, pc->page);
2154	}	2131	}
2155		2132
		2133	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
		2134
		2135	#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) \| (1 << PCG_MOVE_LOCK) \|\
		2136	(1 << PCG_ACCT_LRU) \| (1 << PCG_MIGRATION))
		2137	/*
		2138	* Because tail pages are not marked as "used", set it. We're under
		2139	* zone->lru_lock, 'splitting on pmd' and compund_lock.
		2140	*/
		2141	void mem_cgroup_split_huge_fixup(struct page head, struct page tail)
		2142	{
		2143	struct page_cgroup *head_pc = lookup_page_cgroup(head);
		2144	struct page_cgroup *tail_pc = lookup_page_cgroup(tail);
		2145	unsigned long flags;
		2146
		2147	/*
		2148	* We have no races with charge/uncharge but will have races with
		2149	* page state accounting.
		2150	*/
		2151	move_lock_page_cgroup(head_pc, &flags);
		2152
		2153	tail_pc->mem_cgroup = head_pc->mem_cgroup;
		2154	smp_wmb(); /* see __commit_charge() */
		2155	if (PageCgroupAcctLRU(head_pc)) {
		2156	enum lru_list lru;
		2157	struct mem_cgroup_per_zone *mz;
		2158
		2159	/*
		2160	* LRU flags cannot be copied because we need to add tail
		2161	*.page to LRU by generic call and our hook will be called.
		2162	* We hold lru_lock, then, reduce counter directly.
		2163	*/
		2164	lru = page_lru(head);
		2165	mz = page_cgroup_zoneinfo(head_pc);
		2166	MEM_CGROUP_ZSTAT(mz, lru) -= 1;
		2167	}
		2168	tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
		2169	move_unlock_page_cgroup(head_pc, &flags);
		2170	}
		2171	#endif
		2172
2156	/**	2173	/**
2157	* __mem_cgroup_move_account - move account of the page	2174	* __mem_cgroup_move_account - move account of the page
2158	* @pc: page_cgroup of the page.	2175	* @pc: page_cgroup of the page.
@@ -2171,8 +2188,11 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
2171	*/	2188	*/
2172		2189
2173	static void __mem_cgroup_move_account(struct page_cgroup *pc,	2190	static void __mem_cgroup_move_account(struct page_cgroup *pc,
2174	struct mem_cgroup from, struct mem_cgroup to, bool uncharge)	2191	struct mem_cgroup from, struct mem_cgroup to, bool uncharge,
		2192	int charge_size)
2175	{	2193	{
		2194	int nr_pages = charge_size >> PAGE_SHIFT;
		2195
2176	VM_BUG_ON(from == to);	2196	VM_BUG_ON(from == to);
2177	VM_BUG_ON(PageLRU(pc->page));	2197	VM_BUG_ON(PageLRU(pc->page));
2178	VM_BUG_ON(!page_is_cgroup_locked(pc));	2198	VM_BUG_ON(!page_is_cgroup_locked(pc));
@@ -2186,14 +2206,14 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2186	__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);	2206	__this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
2187	preempt_enable();	2207	preempt_enable();
2188	}	2208	}
2189	mem_cgroup_charge_statistics(from, pc, false);	2209	mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages);
2190	if (uncharge)	2210	if (uncharge)
2191	/* This is not "cancel", but cancel_charge does all we need. */	2211	/* This is not "cancel", but cancel_charge does all we need. */
2192	mem_cgroup_cancel_charge(from, PAGE_SIZE);	2212	mem_cgroup_cancel_charge(from, charge_size);
2193		2213
2194	/* caller should have done css_get */	2214	/* caller should have done css_get */
2195	pc->mem_cgroup = to;	2215	pc->mem_cgroup = to;
2196	mem_cgroup_charge_statistics(to, pc, true);	2216	mem_cgroup_charge_statistics(to, PageCgroupCache(pc), nr_pages);
2197	/*	2217	/*
2198	* We charges against "to" which may not have any tasks. Then, "to"	2218	* We charges against "to" which may not have any tasks. Then, "to"
2199	* can be under rmdir(). But in current implementation, caller of	2219	* can be under rmdir(). But in current implementation, caller of
@@ -2208,15 +2228,19 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2208	* __mem_cgroup_move_account()	2228	* __mem_cgroup_move_account()
2209	*/	2229	*/
2210	static int mem_cgroup_move_account(struct page_cgroup *pc,	2230	static int mem_cgroup_move_account(struct page_cgroup *pc,
2211	struct mem_cgroup from, struct mem_cgroup to, bool uncharge)	2231	struct mem_cgroup from, struct mem_cgroup to,
		2232	bool uncharge, int charge_size)
2212	{	2233	{
2213	int ret = -EINVAL;	2234	int ret = -EINVAL;
2214	unsigned long flags;	2235	unsigned long flags;
2215		2236
		2237	if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page))
		2238	return -EBUSY;
		2239
2216	lock_page_cgroup(pc);	2240	lock_page_cgroup(pc);
2217	if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {	2241	if (PageCgroupUsed(pc) && pc->mem_cgroup == from) {
2218	move_lock_page_cgroup(pc, &flags);	2242	move_lock_page_cgroup(pc, &flags);
2219	__mem_cgroup_move_account(pc, from, to, uncharge);	2243	__mem_cgroup_move_account(pc, from, to, uncharge, charge_size);
2220	move_unlock_page_cgroup(pc, &flags);	2244	move_unlock_page_cgroup(pc, &flags);
2221	ret = 0;	2245	ret = 0;
2222	}	2246	}
@@ -2241,6 +2265,8 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2241	struct cgroup *cg = child->css.cgroup;	2265	struct cgroup *cg = child->css.cgroup;
2242	struct cgroup *pcg = cg->parent;	2266	struct cgroup *pcg = cg->parent;
2243	struct mem_cgroup *parent;	2267	struct mem_cgroup *parent;
		2268	int charge = PAGE_SIZE;
		2269	unsigned long flags;
2244	int ret;	2270	int ret;
2245		2271
2246	/* Is ROOT ? */	2272	/* Is ROOT ? */
@@ -2252,17 +2278,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc,
2252	goto out;	2278	goto out;
2253	if (isolate_lru_page(page))	2279	if (isolate_lru_page(page))
2254	goto put;	2280	goto put;
		2281	/* The page is isolated from LRU and we have no race with splitting */
		2282	charge = PAGE_SIZE << compound_order(page);
2255		2283
2256	parent = mem_cgroup_from_cont(pcg);	2284	parent = mem_cgroup_from_cont(pcg);
2257	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false,	2285	ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge);
2258	PAGE_SIZE);
2259	if (ret \|\| !parent)	2286	if (ret \|\| !parent)
2260	goto put_back;	2287	goto put_back;
2261		2288
2262	ret = mem_cgroup_move_account(pc, child, parent, true);	2289	if (charge > PAGE_SIZE)
		2290	flags = compound_lock_irqsave(page);
		2291
		2292	ret = mem_cgroup_move_account(pc, child, parent, true, charge);
2263	if (ret)	2293	if (ret)
2264	mem_cgroup_cancel_charge(parent, PAGE_SIZE);	2294	mem_cgroup_cancel_charge(parent, charge);
2265	put_back:	2295	put_back:
		2296	if (charge > PAGE_SIZE)
		2297	compound_unlock_irqrestore(page, flags);
2266	putback_lru_page(page);	2298	putback_lru_page(page);
2267	put:	2299	put:
2268	put_page(page);	2300	put_page(page);
@@ -2546,7 +2578,6 @@ direct_uncharge:
2546	static struct mem_cgroup *	2578	static struct mem_cgroup *
2547	__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)	2579	__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2548	{	2580	{
2549	int i;
2550	int count;	2581	int count;
2551	struct page_cgroup *pc;	2582	struct page_cgroup *pc;
2552	struct mem_cgroup *mem = NULL;	2583	struct mem_cgroup *mem = NULL;
@@ -2596,8 +2627,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
2596	break;	2627	break;
2597	}	2628	}
2598		2629
2599	for (i = 0; i < count; i++)	2630	mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count);
2600	mem_cgroup_charge_statistics(mem, pc + i, false);
2601		2631
2602	ClearPageCgroupUsed(pc);	2632	ClearPageCgroupUsed(pc);
2603	/*	2633	/*
@@ -4844,7 +4874,7 @@ retry:
4844	goto put;	4874	goto put;
4845	pc = lookup_page_cgroup(page);	4875	pc = lookup_page_cgroup(page);
4846	if (!mem_cgroup_move_account(pc,	4876	if (!mem_cgroup_move_account(pc,
4847	mc.from, mc.to, false)) {	4877	mc.from, mc.to, false, PAGE_SIZE)) {
4848	mc.precharge--;	4878	mc.precharge--;
4849	/* we uncharge from mc.from later. */	4879	/* we uncharge from mc.from later. */
4850	mc.moved_charge++;	4880	mc.moved_charge++;


diff --git a/mm/truncate.c b/mm/truncate.c index 3c2d5ddfa0d4..49feb46e77b8 100644 --- a/mm/truncate.c +++ b/mm/truncate.c
@@ -549,13 +549,12 @@ EXPORT_SYMBOL(truncate_pagecache);
549	* @inode: inode	549	* @inode: inode
550	* @newsize: new file size	550	* @newsize: new file size
551	*	551	*
552	* truncate_setsize updastes i_size update and performs pagecache	552	* truncate_setsize updates i_size and performs pagecache truncation (if
553	* truncation (if necessary) for a file size updates. It will be	553	* necessary) to @newsize. It will be typically be called from the filesystem's
554	* typically be called from the filesystem's setattr function when	554	* setattr function when ATTR_SIZE is passed in.
555	* ATTR_SIZE is passed in.
556	*	555	*
557	* Must be called with inode_mutex held and after all filesystem	556	* Must be called with inode_mutex held and before all filesystem specific
558	* specific block truncation has been performed.	557	* block truncation has been performed.
559	*/	558	*/
560	void truncate_setsize(struct inode *inode, loff_t newsize)	559	void truncate_setsize(struct inode *inode, loff_t newsize)
561	{	560	{


diff --git a/mm/vmscan.c b/mm/vmscan.c index 47a50962ce81..f5d90dedebba 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c
@@ -41,7 +41,6 @@
41	#include <linux/memcontrol.h>	41	#include <linux/memcontrol.h>
42	#include <linux/delayacct.h>	42	#include <linux/delayacct.h>
43	#include <linux/sysctl.h>	43	#include <linux/sysctl.h>
44	#include <linux/compaction.h>
45		44
46	#include <asm/tlbflush.h>	45	#include <asm/tlbflush.h>
47	#include <asm/div64.h>	46	#include <asm/div64.h>