aboutsummaryrefslogtreecommitdiffstats
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2008-02-07 03:13:53 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:18 -0500
commit8a9f3ccd24741b50200c3f33d62534c7271f3dfc (patch)
tree066aabd8d2952299501f067a91cbfd6f47ee62f6 /mm/swapfile.c
parent78fb74669e80883323391090e4d26d17fe29488f (diff)
Memory controller: memory accounting
Add the accounting hooks. The accounting is carried out for RSS and Page Cache (unmapped) pages. There is now a common limit and accounting for both. The RSS accounting is accounted at page_add_*_rmap() and page_remove_rmap() time. Page cache is accounted at add_to_page_cache(), __delete_from_page_cache(). Swap cache is also accounted for. Each page's page_cgroup is protected with the last bit of the page_cgroup pointer, this makes handling of race conditions involving simultaneous mappings of a page easier. A reference count is kept in the page_cgroup to deal with cases where a page might be unmapped from the RSS of all tasks, but still lives in the page cache. Credits go to Vaidyanathan Srinivasan for helping with reference counting work of the page cgroup. Almost all of the page cache accounting code has help from Vaidyanathan Srinivasan. [hugh@veritas.com: fix swapoff breakage] [akpm@linux-foundation.org: fix locking] Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: <Valdis.Kletnieks@vt.edu> Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c41
1 files changed, 24 insertions, 17 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index afae7b1f680b..fddc4cc4149b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -27,6 +27,7 @@
27#include <linux/mutex.h> 27#include <linux/mutex.h>
28#include <linux/capability.h> 28#include <linux/capability.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/memcontrol.h>
30 31
31#include <asm/pgtable.h> 32#include <asm/pgtable.h>
32#include <asm/tlbflush.h> 33#include <asm/tlbflush.h>
@@ -506,9 +507,12 @@ unsigned int count_swap_pages(int type, int free)
506 * just let do_wp_page work it out if a write is requested later - to 507 * just let do_wp_page work it out if a write is requested later - to
507 * force COW, vm_page_prot omits write permission from any private vma. 508 * force COW, vm_page_prot omits write permission from any private vma.
508 */ 509 */
509static void unuse_pte(struct vm_area_struct *vma, pte_t *pte, 510static int unuse_pte(struct vm_area_struct *vma, pte_t *pte,
510 unsigned long addr, swp_entry_t entry, struct page *page) 511 unsigned long addr, swp_entry_t entry, struct page *page)
511{ 512{
513 if (mem_cgroup_charge(page, vma->vm_mm))
514 return -ENOMEM;
515
512 inc_mm_counter(vma->vm_mm, anon_rss); 516 inc_mm_counter(vma->vm_mm, anon_rss);
513 get_page(page); 517 get_page(page);
514 set_pte_at(vma->vm_mm, addr, pte, 518 set_pte_at(vma->vm_mm, addr, pte,
@@ -520,6 +524,7 @@ static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
520 * immediately swapped out again after swapon. 524 * immediately swapped out again after swapon.
521 */ 525 */
522 activate_page(page); 526 activate_page(page);
527 return 1;
523} 528}
524 529
525static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, 530static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -529,7 +534,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
529 pte_t swp_pte = swp_entry_to_pte(entry); 534 pte_t swp_pte = swp_entry_to_pte(entry);
530 pte_t *pte; 535 pte_t *pte;
531 spinlock_t *ptl; 536 spinlock_t *ptl;
532 int found = 0; 537 int ret = 0;
533 538
534 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 539 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
535 do { 540 do {
@@ -538,13 +543,12 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
538 * Test inline before going to call unuse_pte. 543 * Test inline before going to call unuse_pte.
539 */ 544 */
540 if (unlikely(pte_same(*pte, swp_pte))) { 545 if (unlikely(pte_same(*pte, swp_pte))) {
541 unuse_pte(vma, pte++, addr, entry, page); 546 ret = unuse_pte(vma, pte++, addr, entry, page);
542 found = 1;
543 break; 547 break;
544 } 548 }
545 } while (pte++, addr += PAGE_SIZE, addr != end); 549 } while (pte++, addr += PAGE_SIZE, addr != end);
546 pte_unmap_unlock(pte - 1, ptl); 550 pte_unmap_unlock(pte - 1, ptl);
547 return found; 551 return ret;
548} 552}
549 553
550static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, 554static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
@@ -553,14 +557,16 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud,
553{ 557{
554 pmd_t *pmd; 558 pmd_t *pmd;
555 unsigned long next; 559 unsigned long next;
560 int ret;
556 561
557 pmd = pmd_offset(pud, addr); 562 pmd = pmd_offset(pud, addr);
558 do { 563 do {
559 next = pmd_addr_end(addr, end); 564 next = pmd_addr_end(addr, end);
560 if (pmd_none_or_clear_bad(pmd)) 565 if (pmd_none_or_clear_bad(pmd))
561 continue; 566 continue;
562 if (unuse_pte_range(vma, pmd, addr, next, entry, page)) 567 ret = unuse_pte_range(vma, pmd, addr, next, entry, page);
563 return 1; 568 if (ret)
569 return ret;
564 } while (pmd++, addr = next, addr != end); 570 } while (pmd++, addr = next, addr != end);
565 return 0; 571 return 0;
566} 572}
@@ -571,14 +577,16 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
571{ 577{
572 pud_t *pud; 578 pud_t *pud;
573 unsigned long next; 579 unsigned long next;
580 int ret;
574 581
575 pud = pud_offset(pgd, addr); 582 pud = pud_offset(pgd, addr);
576 do { 583 do {
577 next = pud_addr_end(addr, end); 584 next = pud_addr_end(addr, end);
578 if (pud_none_or_clear_bad(pud)) 585 if (pud_none_or_clear_bad(pud))
579 continue; 586 continue;
580 if (unuse_pmd_range(vma, pud, addr, next, entry, page)) 587 ret = unuse_pmd_range(vma, pud, addr, next, entry, page);
581 return 1; 588 if (ret)
589 return ret;
582 } while (pud++, addr = next, addr != end); 590 } while (pud++, addr = next, addr != end);
583 return 0; 591 return 0;
584} 592}
@@ -588,6 +596,7 @@ static int unuse_vma(struct vm_area_struct *vma,
588{ 596{
589 pgd_t *pgd; 597 pgd_t *pgd;
590 unsigned long addr, end, next; 598 unsigned long addr, end, next;
599 int ret;
591 600
592 if (page->mapping) { 601 if (page->mapping) {
593 addr = page_address_in_vma(page, vma); 602 addr = page_address_in_vma(page, vma);
@@ -605,8 +614,9 @@ static int unuse_vma(struct vm_area_struct *vma,
605 next = pgd_addr_end(addr, end); 614 next = pgd_addr_end(addr, end);
606 if (pgd_none_or_clear_bad(pgd)) 615 if (pgd_none_or_clear_bad(pgd))
607 continue; 616 continue;
608 if (unuse_pud_range(vma, pgd, addr, next, entry, page)) 617 ret = unuse_pud_range(vma, pgd, addr, next, entry, page);
609 return 1; 618 if (ret)
619 return ret;
610 } while (pgd++, addr = next, addr != end); 620 } while (pgd++, addr = next, addr != end);
611 return 0; 621 return 0;
612} 622}
@@ -615,6 +625,7 @@ static int unuse_mm(struct mm_struct *mm,
615 swp_entry_t entry, struct page *page) 625 swp_entry_t entry, struct page *page)
616{ 626{
617 struct vm_area_struct *vma; 627 struct vm_area_struct *vma;
628 int ret = 0;
618 629
619 if (!down_read_trylock(&mm->mmap_sem)) { 630 if (!down_read_trylock(&mm->mmap_sem)) {
620 /* 631 /*
@@ -627,15 +638,11 @@ static int unuse_mm(struct mm_struct *mm,
627 lock_page(page); 638 lock_page(page);
628 } 639 }
629 for (vma = mm->mmap; vma; vma = vma->vm_next) { 640 for (vma = mm->mmap; vma; vma = vma->vm_next) {
630 if (vma->anon_vma && unuse_vma(vma, entry, page)) 641 if (vma->anon_vma && (ret = unuse_vma(vma, entry, page)))
631 break; 642 break;
632 } 643 }
633 up_read(&mm->mmap_sem); 644 up_read(&mm->mmap_sem);
634 /* 645 return (ret < 0)? ret: 0;
635 * Currently unuse_mm cannot fail, but leave error handling
636 * at call sites for now, since we change it from time to time.
637 */
638 return 0;
639} 646}
640 647
641/* 648/*