diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-02-07 03:14:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:19 -0500 |
commit | bed7161a519a2faef53e1bce1b47595e297c1d14 (patch) | |
tree | fbc0541340465f7d83221b829a9382cac2855916 /mm | |
parent | 8697d33194faae6fdd6b2e799f6308aa00cfdf67 (diff) |
Memory controller: make page_referenced() cgroup aware
Make page_referenced() cgroup aware. Without this patch, page_referenced()
can cause a page to be skipped while reclaiming pages. This patch ensures
that other cgroups do not hold pages in a particular cgroup hostage. It
is required to ensure that shared pages are freed from a cgroup when they
are not actively referenced from the cgroup that brought them in
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 5 | ||||
-rw-r--r-- | mm/rmap.c | 30 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
3 files changed, 31 insertions, 8 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 10833d969e3f..ff7cac602984 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -110,6 +110,11 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |||
110 | struct mem_cgroup, css); | 110 | struct mem_cgroup, css); |
111 | } | 111 | } |
112 | 112 | ||
113 | inline struct mem_cgroup *mm_cgroup(struct mm_struct *mm) | ||
114 | { | ||
115 | return rcu_dereference(mm->mem_cgroup); | ||
116 | } | ||
117 | |||
113 | void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) | 118 | void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p) |
114 | { | 119 | { |
115 | struct mem_cgroup *mem; | 120 | struct mem_cgroup *mem; |
@@ -302,7 +302,8 @@ out: | |||
302 | return referenced; | 302 | return referenced; |
303 | } | 303 | } |
304 | 304 | ||
305 | static int page_referenced_anon(struct page *page) | 305 | static int page_referenced_anon(struct page *page, |
306 | struct mem_cgroup *mem_cont) | ||
306 | { | 307 | { |
307 | unsigned int mapcount; | 308 | unsigned int mapcount; |
308 | struct anon_vma *anon_vma; | 309 | struct anon_vma *anon_vma; |
@@ -315,6 +316,13 @@ static int page_referenced_anon(struct page *page) | |||
315 | 316 | ||
316 | mapcount = page_mapcount(page); | 317 | mapcount = page_mapcount(page); |
317 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 318 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { |
319 | /* | ||
320 | * If we are reclaiming on behalf of a cgroup, skip | ||
321 | * counting on behalf of references from different | ||
322 | * cgroups | ||
323 | */ | ||
324 | if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont)) | ||
325 | continue; | ||
318 | referenced += page_referenced_one(page, vma, &mapcount); | 326 | referenced += page_referenced_one(page, vma, &mapcount); |
319 | if (!mapcount) | 327 | if (!mapcount) |
320 | break; | 328 | break; |
@@ -335,7 +343,8 @@ static int page_referenced_anon(struct page *page) | |||
335 | * | 343 | * |
336 | * This function is only called from page_referenced for object-based pages. | 344 | * This function is only called from page_referenced for object-based pages. |
337 | */ | 345 | */ |
338 | static int page_referenced_file(struct page *page) | 346 | static int page_referenced_file(struct page *page, |
347 | struct mem_cgroup *mem_cont) | ||
339 | { | 348 | { |
340 | unsigned int mapcount; | 349 | unsigned int mapcount; |
341 | struct address_space *mapping = page->mapping; | 350 | struct address_space *mapping = page->mapping; |
@@ -368,6 +377,13 @@ static int page_referenced_file(struct page *page) | |||
368 | mapcount = page_mapcount(page); | 377 | mapcount = page_mapcount(page); |
369 | 378 | ||
370 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { | 379 | vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { |
380 | /* | ||
381 | * If we are reclaiming on behalf of a cgroup, skip | ||
382 | * counting on behalf of references from different | ||
383 | * cgroups | ||
384 | */ | ||
385 | if (mem_cont && (mm_cgroup(vma->vm_mm) != mem_cont)) | ||
386 | continue; | ||
371 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) | 387 | if ((vma->vm_flags & (VM_LOCKED|VM_MAYSHARE)) |
372 | == (VM_LOCKED|VM_MAYSHARE)) { | 388 | == (VM_LOCKED|VM_MAYSHARE)) { |
373 | referenced++; | 389 | referenced++; |
@@ -390,7 +406,8 @@ static int page_referenced_file(struct page *page) | |||
390 | * Quick test_and_clear_referenced for all mappings to a page, | 406 | * Quick test_and_clear_referenced for all mappings to a page, |
391 | * returns the number of ptes which referenced the page. | 407 | * returns the number of ptes which referenced the page. |
392 | */ | 408 | */ |
393 | int page_referenced(struct page *page, int is_locked) | 409 | int page_referenced(struct page *page, int is_locked, |
410 | struct mem_cgroup *mem_cont) | ||
394 | { | 411 | { |
395 | int referenced = 0; | 412 | int referenced = 0; |
396 | 413 | ||
@@ -402,14 +419,15 @@ int page_referenced(struct page *page, int is_locked) | |||
402 | 419 | ||
403 | if (page_mapped(page) && page->mapping) { | 420 | if (page_mapped(page) && page->mapping) { |
404 | if (PageAnon(page)) | 421 | if (PageAnon(page)) |
405 | referenced += page_referenced_anon(page); | 422 | referenced += page_referenced_anon(page, mem_cont); |
406 | else if (is_locked) | 423 | else if (is_locked) |
407 | referenced += page_referenced_file(page); | 424 | referenced += page_referenced_file(page, mem_cont); |
408 | else if (TestSetPageLocked(page)) | 425 | else if (TestSetPageLocked(page)) |
409 | referenced++; | 426 | referenced++; |
410 | else { | 427 | else { |
411 | if (page->mapping) | 428 | if (page->mapping) |
412 | referenced += page_referenced_file(page); | 429 | referenced += |
430 | page_referenced_file(page, mem_cont); | ||
413 | unlock_page(page); | 431 | unlock_page(page); |
414 | } | 432 | } |
415 | } | 433 | } |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 7408a8a7d882..215f6a726b2f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -503,7 +503,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
503 | goto keep_locked; | 503 | goto keep_locked; |
504 | } | 504 | } |
505 | 505 | ||
506 | referenced = page_referenced(page, 1); | 506 | referenced = page_referenced(page, 1, sc->mem_cgroup); |
507 | /* In active use or really unfreeable? Activate it. */ | 507 | /* In active use or really unfreeable? Activate it. */ |
508 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && | 508 | if (sc->order <= PAGE_ALLOC_COSTLY_ORDER && |
509 | referenced && page_mapping_inuse(page)) | 509 | referenced && page_mapping_inuse(page)) |
@@ -1057,7 +1057,7 @@ force_reclaim_mapped: | |||
1057 | if (page_mapped(page)) { | 1057 | if (page_mapped(page)) { |
1058 | if (!reclaim_mapped || | 1058 | if (!reclaim_mapped || |
1059 | (total_swap_pages == 0 && PageAnon(page)) || | 1059 | (total_swap_pages == 0 && PageAnon(page)) || |
1060 | page_referenced(page, 0)) { | 1060 | page_referenced(page, 0, sc->mem_cgroup)) { |
1061 | list_add(&page->lru, &l_active); | 1061 | list_add(&page->lru, &l_active); |
1062 | continue; | 1062 | continue; |
1063 | } | 1063 | } |