aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2008-02-07 03:13:53 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:18 -0500
commit8a9f3ccd24741b50200c3f33d62534c7271f3dfc (patch)
tree066aabd8d2952299501f067a91cbfd6f47ee62f6 /mm/memory.c
parent78fb74669e80883323391090e4d26d17fe29488f (diff)
Memory controller: memory accounting
Add the accounting hooks. The accounting is carried out for RSS and Page Cache (unmapped) pages. There is now a common limit and accounting for both. The RSS accounting is accounted at page_add_*_rmap() and page_remove_rmap() time. Page cache is accounted at add_to_page_cache(), __delete_from_page_cache(). Swap cache is also accounted for. Each page's page_cgroup is protected with the last bit of the page_cgroup pointer, this makes handling of race conditions involving simultaneous mappings of a page easier. A reference count is kept in the page_cgroup to deal with cases where a page might be unmapped from the RSS of all tasks, but still lives in the page cache. Credits go to Vaidyanathan Srinivasan for helping with reference counting work of the page cgroup. Almost all of the page cache accounting code has help from Vaidyanathan Srinivasan. [hugh@veritas.com: fix swapoff breakage] [akpm@linux-foundation.org: fix locking] Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: <Valdis.Kletnieks@vt.edu> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c47
1 files changed, 42 insertions, 5 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 9d073fa0a2d0..0ba224ea6ba4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -50,6 +50,7 @@
50#include <linux/delayacct.h> 50#include <linux/delayacct.h>
51#include <linux/init.h> 51#include <linux/init.h>
52#include <linux/writeback.h> 52#include <linux/writeback.h>
53#include <linux/memcontrol.h>
53 54
54#include <asm/pgalloc.h> 55#include <asm/pgalloc.h>
55#include <asm/uaccess.h> 56#include <asm/uaccess.h>
@@ -1144,16 +1145,20 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
1144{ 1145{
1145 int retval; 1146 int retval;
1146 pte_t *pte; 1147 pte_t *pte;
1147 spinlock_t *ptl; 1148 spinlock_t *ptl;
1149
1150 retval = mem_cgroup_charge(page, mm);
1151 if (retval)
1152 goto out;
1148 1153
1149 retval = -EINVAL; 1154 retval = -EINVAL;
1150 if (PageAnon(page)) 1155 if (PageAnon(page))
1151 goto out; 1156 goto out_uncharge;
1152 retval = -ENOMEM; 1157 retval = -ENOMEM;
1153 flush_dcache_page(page); 1158 flush_dcache_page(page);
1154 pte = get_locked_pte(mm, addr, &ptl); 1159 pte = get_locked_pte(mm, addr, &ptl);
1155 if (!pte) 1160 if (!pte)
1156 goto out; 1161 goto out_uncharge;
1157 retval = -EBUSY; 1162 retval = -EBUSY;
1158 if (!pte_none(*pte)) 1163 if (!pte_none(*pte))
1159 goto out_unlock; 1164 goto out_unlock;
@@ -1165,8 +1170,12 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
1165 set_pte_at(mm, addr, pte, mk_pte(page, prot)); 1170 set_pte_at(mm, addr, pte, mk_pte(page, prot));
1166 1171
1167 retval = 0; 1172 retval = 0;
1173 pte_unmap_unlock(pte, ptl);
1174 return retval;
1168out_unlock: 1175out_unlock:
1169 pte_unmap_unlock(pte, ptl); 1176 pte_unmap_unlock(pte, ptl);
1177out_uncharge:
1178 mem_cgroup_uncharge_page(page);
1170out: 1179out:
1171 return retval; 1180 return retval;
1172} 1181}
@@ -1641,6 +1650,9 @@ gotten:
1641 cow_user_page(new_page, old_page, address, vma); 1650 cow_user_page(new_page, old_page, address, vma);
1642 __SetPageUptodate(new_page); 1651 __SetPageUptodate(new_page);
1643 1652
1653 if (mem_cgroup_charge(new_page, mm))
1654 goto oom_free_new;
1655
1644 /* 1656 /*
1645 * Re-check the pte - we dropped the lock 1657 * Re-check the pte - we dropped the lock
1646 */ 1658 */
@@ -1672,7 +1684,9 @@ gotten:
1672 /* Free the old page.. */ 1684 /* Free the old page.. */
1673 new_page = old_page; 1685 new_page = old_page;
1674 ret |= VM_FAULT_WRITE; 1686 ret |= VM_FAULT_WRITE;
1675 } 1687 } else
1688 mem_cgroup_uncharge_page(new_page);
1689
1676 if (new_page) 1690 if (new_page)
1677 page_cache_release(new_page); 1691 page_cache_release(new_page);
1678 if (old_page) 1692 if (old_page)
@@ -1696,6 +1710,8 @@ unlock:
1696 put_page(dirty_page); 1710 put_page(dirty_page);
1697 } 1711 }
1698 return ret; 1712 return ret;
1713oom_free_new:
1714 __free_page(new_page);
1699oom: 1715oom:
1700 if (old_page) 1716 if (old_page)
1701 page_cache_release(old_page); 1717 page_cache_release(old_page);
@@ -2036,6 +2052,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2036 count_vm_event(PGMAJFAULT); 2052 count_vm_event(PGMAJFAULT);
2037 } 2053 }
2038 2054
2055 if (mem_cgroup_charge(page, mm)) {
2056 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2057 ret = VM_FAULT_OOM;
2058 goto out;
2059 }
2060
2039 mark_page_accessed(page); 2061 mark_page_accessed(page);
2040 lock_page(page); 2062 lock_page(page);
2041 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2063 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
@@ -2073,8 +2095,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2073 if (write_access) { 2095 if (write_access) {
2074 /* XXX: We could OR the do_wp_page code with this one? */ 2096 /* XXX: We could OR the do_wp_page code with this one? */
2075 if (do_wp_page(mm, vma, address, 2097 if (do_wp_page(mm, vma, address,
2076 page_table, pmd, ptl, pte) & VM_FAULT_OOM) 2098 page_table, pmd, ptl, pte) & VM_FAULT_OOM) {
2099 mem_cgroup_uncharge_page(page);
2077 ret = VM_FAULT_OOM; 2100 ret = VM_FAULT_OOM;
2101 }
2078 goto out; 2102 goto out;
2079 } 2103 }
2080 2104
@@ -2085,6 +2109,7 @@ unlock:
2085out: 2109out:
2086 return ret; 2110 return ret;
2087out_nomap: 2111out_nomap:
2112 mem_cgroup_uncharge_page(page);
2088 pte_unmap_unlock(page_table, ptl); 2113 pte_unmap_unlock(page_table, ptl);
2089 unlock_page(page); 2114 unlock_page(page);
2090 page_cache_release(page); 2115 page_cache_release(page);
@@ -2114,6 +2139,9 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2114 goto oom; 2139 goto oom;
2115 __SetPageUptodate(page); 2140 __SetPageUptodate(page);
2116 2141
2142 if (mem_cgroup_charge(page, mm))
2143 goto oom_free_page;
2144
2117 entry = mk_pte(page, vma->vm_page_prot); 2145 entry = mk_pte(page, vma->vm_page_prot);
2118 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2146 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2119 2147
@@ -2131,8 +2159,11 @@ unlock:
2131 pte_unmap_unlock(page_table, ptl); 2159 pte_unmap_unlock(page_table, ptl);
2132 return 0; 2160 return 0;
2133release: 2161release:
2162 mem_cgroup_uncharge_page(page);
2134 page_cache_release(page); 2163 page_cache_release(page);
2135 goto unlock; 2164 goto unlock;
2165oom_free_page:
2166 __free_page(page);
2136oom: 2167oom:
2137 return VM_FAULT_OOM; 2168 return VM_FAULT_OOM;
2138} 2169}
@@ -2246,6 +2277,11 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2246 2277
2247 } 2278 }
2248 2279
2280 if (mem_cgroup_charge(page, mm)) {
2281 ret = VM_FAULT_OOM;
2282 goto out;
2283 }
2284
2249 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 2285 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
2250 2286
2251 /* 2287 /*
@@ -2281,6 +2317,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2281 /* no need to invalidate: a not-present page won't be cached */ 2317 /* no need to invalidate: a not-present page won't be cached */
2282 update_mmu_cache(vma, address, entry); 2318 update_mmu_cache(vma, address, entry);
2283 } else { 2319 } else {
2320 mem_cgroup_uncharge_page(page);
2284 if (anon) 2321 if (anon)
2285 page_cache_release(page); 2322 page_cache_release(page);
2286 else 2323 else