diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-02-07 03:14:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:19 -0500 |
commit | bed7161a519a2faef53e1bce1b47595e297c1d14 (patch) | |
tree | fbc0541340465f7d83221b829a9382cac2855916 | |
parent | 8697d33194faae6fdd6b2e799f6308aa00cfdf67 (diff) |
Memory controller: make page_referenced() cgroup aware
Make page_referenced() cgroup aware. Without this patch, page_referenced()
can cause a page to be skipped while reclaiming pages. This patch ensures
that other cgroups do not hold pages in a particular cgroup hostage. It
is required to ensure that shared pages are freed from a cgroup when they
are not actively referenced from the cgroup that brought them in
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 6 | ||||
-rw-r--r-- | include/linux/rmap.h | 5 | ||||
-rw-r--r-- | mm/memcontrol.c | 5 | ||||
-rw-r--r-- | mm/rmap.c | 30 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
5 files changed, 40 insertions, 10 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bb6f5105401b..9d0a830423b6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -43,6 +43,7 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
43 | int active); | 43 | int active); |
44 | extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); | 44 | extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); |
45 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm); | 45 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm); |
46 | extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); | ||
46 | 47 | ||
47 | static inline void mem_cgroup_uncharge_page(struct page *page) | 48 | static inline void mem_cgroup_uncharge_page(struct page *page) |
48 | { | 49 | { |
@@ -93,6 +94,11 @@ static inline int mem_cgroup_cache_charge(struct page *page, | |||
93 | return 0; | 94 | return 0; |
94 | } | 95 | } |
95 | 96 | ||
97 | static inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm) | ||
98 | { | ||
99 | return NULL; | ||
100 | } | ||
101 | |||
96 | #endif /* CONFIG_CGROUP_MEM_CONT */ | 102 | #endif /* CONFIG_CGROUP_MEM_CONT */ |
97 | 103 | ||
98 | #endif /* _LINUX_MEMCONTROL_H */ | 104 | #endif /* _LINUX_MEMCONTROL_H */ |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 97347f22fc20..1383692ac5bd 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/spinlock.h> | 10 | #include <linux/spinlock.h> |
11 | #include <linux/memcontrol.h> | ||
11 | 12 | ||
12 | /* | 13 | /* |
13 | * The anon_vma heads a list of private "related" vmas, to scan if | 14 | * The anon_vma heads a list of private "related" vmas, to scan if |
@@ -86,7 +87,7 @@ static inline void page_dup_rmap(struct page *page, struct vm_area_struct *vma, | |||
86 | /* | 87 | /* |
87 | * Called from mm/vmscan.c to handle paging out | 88 | * Called from mm/vmscan.c to handle paging out |
88 | */ | 89 | */ |
89 | int page_referenced(struct page *, int is_locked); | 90 | int page_referenced(struct page *, int is_locked, struct mem_cgroup *cnt); |
90 | int try_to_unmap(struct page *, int ignore_refs); | 91 | int try_to_unmap(struct page *, int ignore_refs); |
91 | 92 | ||
92 | /* | 93 | /* |
@@ -114,7 +115,7 @@ int page_mkclean(struct page *); | |||
114 | #define anon_vma_prepare(vma) (0) | 115 | #define anon_vma_prepare(vma) (0) |
115 | #define anon_vma_link(vma) do {} while (0) | 116 | #define anon_vma_link(vma) do {} while (0) |
116 | 117 | ||
117 | #define page_referenced(page,l) TestClearPageReferenced(page) | 118 | #define page_referenced(page,l,cnt) TestClearPageReferenced(page) |
118 | #define try_to_unmap(page, refs) SWAP_FAIL | 119 | #define try_to_unmap(page, refs) SWAP_FAIL |
119 | 120 | ||
120 | static inline int page_mkclean(struct page *page) | 121 | static inline int page_mkclean(struct page *page) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 10833d969e3f..ff7cac602984 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -110,6 +110,11 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |||
110 | struct mem_cgroup, css); | 110 | struct mem_cgroup, css); |
111 | } | 111 | } |
112 | 112 | ||
113 | inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm) | ||
114 | { | ||
115 | return rcu_dereference(mm->mem_cgroup); | ||
116 | } | ||
117 | |||
113 | void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) | 118 | void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) |
114 | { | 119 | { |
115 | struct mem_cgroup *mem; | 120 | struct mem_cgroup *mem; |
@@ -302,7 +302,8 @@ out: | |||
302 | return referenced; | 302 | return referenced; |
303 | } | 303 | } |
304 | 304 | ||
305 | static int page_referenced_anon(struct page *page) | 305 | static int page_referenced_anon(struct page *page, |
306 | struct mem_cgroup *mem_cont) | ||
306 | { | 307 | { |
307 | unsigned int mapcount; | 308 | unsigned int mapcount; |
308 | struct anon_vma *anon_vma; | 309 | struct anon_vma *anon_vma; |
@@ -315,6 +316,13 @@ static int page_referenced_anon(struct page *page) | |||
315 | 316 | ||
316 | mapcount = page_mapcount(page); | 317 | mapcount = page_mapcount(page); |
317 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 318 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
319 | /* | ||
320 | * If we are reclaiming on behalf of a cgroup, skip | ||
321 | * counting on behalf of references from different | ||
322 | * cgroups | ||
323 | */ | ||
324 | if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont)) | ||
325 | continue; | ||
318 | referenced += page_referenced_one(page, vma, &mapcount); | 326 | referenced += page_referenced_one(page, vma, &mapcount); |
319 | if (!mapcount) | 327 | if (!mapcount) |
320 | break; | 328 | break; |
@@ -335,7 +343,8 @@ static int page_referenced_anon(struct page *page) | |||
335 | * | 343 | * |
336 | * This function is only called from page_referenced for object-based pages. | 344 | * This function is only called from page_referenced for object-based pages. |
337 | */ | 345 | */ |
338 | static int page_referenced_file(struct page *page) | 346 | static int page_referenced_file(struct page *page, |
347 | struct mem_cgroup *mem_cont) | ||
339 | { | 348 | { |
340 | unsigned int mapcount; | 349 | unsigned int mapcount; |
341 | struct address_space *mapping = page->mapping; | 350 | struct address_space *mapping = page->mapping; |
@@ -368,6 +377,13 @@ static int page_referenced_file(struct page *page) | |||
368 | mapcount = page_mapcount(page); | 377 | mapcount = page_mapcount(page); |
369 | 378 | ||
370 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 379 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
380 | /* | ||
381 | * If we are reclaiming on behalf of a cgroup, skip | ||
382 | * counting on behalf of references from different | ||
383 | * cgroups | ||
384 | */ | ||
385 | if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont)) | ||
386 | continue; | ||
371 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) | 387 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) |
372 | == (VM_LOCKED|VM_MAYSHARE)) { | 388 | == (VM_LOCKED|VM_MAYSHARE)) { |
373 | referenced++; | 389 | referenced++; |
@@ -390,7 +406,8 @@ static int page_referenced_file(struct page *page) | |||
390 | * Quick test_and_clear_referenced for all mappings to a page, | 406 | * Quick test_and_clear_referenced for all mappings to a page, |
391 | * returns the number of ptes which referenced the page. | 407 | * returns the number of ptes which referenced the page. |
392 | */ | 408 | */ |
393 | int page_referenced(struct page *page, int is_locked) | 409 | int page_referenced(struct page *page, int is_locked, |
410 | struct mem_cgroup *mem_cont) | ||
394 | { | 411 | { |
395 | int referenced = 0; | 412 | int referenced = 0; |
396 | 413 | ||
@@ -402,14 +419,15 @@ int page_referenced(struct page *page, int is_locked) | |||
402 | 419 | ||
403 | if (page_mapped(page) && page->mapping) { | 420 | if (page_mapped(page) && page->mapping) { |
404 | if (PageAnon(page)) | 421 | if (PageAnon(page)) |
405 | referenced += page_referenced_anon(page); | 422 | referenced += page_referenced_anon(page, mem_cont); |
406 | else if (is_locked) | 423 | else if (is_locked) |
407 | referenced += page_referenced_file(page); | 424 | referenced += page_referenced_file(page, mem_cont); |
408 | else if (TestSetPageLocked(page)) | 425 | else if (TestSetPageLocked(page)) |
409 | referenced++; | 426 | referenced++; |
410 | else { | 427 | else { |
411 | if (page->mapping) | 428 | if (page->mapping) |
412 | referenced += page_referenced_file(page); | 429 | referenced += |
430 | page_referenced_file(page, mem_cont); | ||
413 | unlock_page(page); | 431 | unlock_page(page); |
414 | } | 432 | } |
415 | } | 433 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 7408a8a7d882..215f6a726b2f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -503,7 +503,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
503 | goto keep_locked; | 503 | goto keep_locked; |
504 | } | 504 | } |
505 | 505 | ||
506 | referenced = page_referenced(page, 1); | 506 | referenced = page_referenced(page, 1, sc->mem_cgroup); |
507 | /* In active use or really unfreeable? Activate it. */ | 507 | /* In active use or really unfreeable? Activate it. */ |
508 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | 508 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
509 | referenced && page_mapping_inuse(page)) | 509 | referenced && page_mapping_inuse(page)) |
@@ -1057,7 +1057,7 @@ force_reclaim_mapped: | |||
1057 | if (page_mapped(page)) { | 1057 | if (page_mapped(page)) { |
1058 | if (!reclaim_mapped || | 1058 | if (!reclaim_mapped || |
1059 | (total_swap_pages == 0 && PageAnon(page)) || | 1059 | (total_swap_pages == 0 && PageAnon(page)) || |
1060 | page_referenced(page, 0)) { | 1060 | page_referenced(page, 0, sc->mem_cgroup)) { |
1061 | list_add(&page->lru, &l_active); | 1061 | list_add(&page->lru, &l_active); |
1062 | continue; | 1062 | continue; |
1063 | } | 1063 | } |