diff options
author | KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> | 2009-01-07 21:07:56 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:05 -0500 |
commit | d13d144309d2e5a3e6ad978b16c1d0226ddc9231 (patch) | |
tree | 37c19902b527823956db969d9428737081b2a94d | |
parent | c1e862c1f5ad34771b6d0a528cf681e0dcad7c86 (diff) |
memcg: handle swap caches
SwapCache support for memory resource controller (memcg)
Before mem+swap controller, memcg itself should handle SwapCache in proper
way. This is cut-out from it.
In current memcg, SwapCache is just leaked and the user can create tons of
SwapCache. This is a leak of account and should be handled.
SwapCache accounting is done as following.
charge (anon)
- charged when it's mapped.
(because of readahead, charge at add_to_swap_cache() is not sane)
uncharge (anon)
- uncharged when it's dropped from swapcache and fully unmapped.
means it's not uncharged at unmap.
Note: delete from swap cache at swap-in is done after rmap information
is established.
charge (shmem)
- charged at swap-in. this prevents charge at add_to_page_cache().
uncharge (shmem)
- uncharged when it's dropped from swapcache and not on shmem's
radix-tree.
at migration, check against 'old page' is modified to handle shmem.
Comparing to the old version discussed (and caused troubles), we have
advantages of
- PCG_USED bit.
- simple migrating handling.
So, situation is much easier than several months ago, maybe.
[hugh@veritas.com: memcg: handle swap caches build fix]
Reviewed-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Tested-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/controllers/memory.txt | 5 | ||||
-rw-r--r-- | include/linux/swap.h | 22 | ||||
-rw-r--r-- | mm/memcontrol.c | 67 | ||||
-rw-r--r-- | mm/shmem.c | 18 | ||||
-rw-r--r-- | mm/swap_state.c | 1 |
5 files changed, 105 insertions, 8 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 54253b7a8db2..9fe2d0eabe05 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt | |||
@@ -137,6 +137,11 @@ behind this approach is that a cgroup that aggressively uses a shared | |||
137 | page will eventually get charged for it (once it is uncharged from | 137 | page will eventually get charged for it (once it is uncharged from |
138 | the cgroup that brought it in -- this will happen on memory pressure). | 138 | the cgroup that brought it in -- this will happen on memory pressure). |
139 | 139 | ||
140 | Exception: When you do swapoff and make swapped-out pages of shmem(tmpfs) to | ||
141 | be backed into memory in force, charges for pages are accounted against the | ||
142 | caller of swapoff rather than the users of shmem. | ||
143 | |||
144 | |||
140 | 2.4 Reclaim | 145 | 2.4 Reclaim |
141 | 146 | ||
142 | Each cgroup maintains a per cgroup LRU that consists of an active | 147 | Each cgroup maintains a per cgroup LRU that consists of an active |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 91dee50fe260..f8f3907533f0 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -333,6 +333,22 @@ static inline void disable_swap_token(void) | |||
333 | put_swap_token(swap_token_mm); | 333 | put_swap_token(swap_token_mm); |
334 | } | 334 | } |
335 | 335 | ||
336 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
337 | extern int mem_cgroup_cache_charge_swapin(struct page *page, | ||
338 | struct mm_struct *mm, gfp_t mask, bool locked); | ||
339 | extern void mem_cgroup_uncharge_swapcache(struct page *page); | ||
340 | #else | ||
341 | static inline | ||
342 | int mem_cgroup_cache_charge_swapin(struct page *page, | ||
343 | struct mm_struct *mm, gfp_t mask, bool locked) | ||
344 | { | ||
345 | return 0; | ||
346 | } | ||
347 | static inline void mem_cgroup_uncharge_swapcache(struct page *page) | ||
348 | { | ||
349 | } | ||
350 | #endif | ||
351 | |||
336 | #else /* CONFIG_SWAP */ | 352 | #else /* CONFIG_SWAP */ |
337 | 353 | ||
338 | #define nr_swap_pages 0L | 354 | #define nr_swap_pages 0L |
@@ -409,6 +425,12 @@ static inline swp_entry_t get_swap_page(void) | |||
409 | #define has_swap_token(x) 0 | 425 | #define has_swap_token(x) 0 |
410 | #define disable_swap_token() do { } while(0) | 426 | #define disable_swap_token() do { } while(0) |
411 | 427 | ||
428 | static inline int mem_cgroup_cache_charge_swapin(struct page *page, | ||
429 | struct mm_struct *mm, gfp_t mask, bool locked) | ||
430 | { | ||
431 | return 0; | ||
432 | } | ||
433 | |||
412 | #endif /* CONFIG_SWAP */ | 434 | #endif /* CONFIG_SWAP */ |
413 | #endif /* __KERNEL__*/ | 435 | #endif /* __KERNEL__*/ |
414 | #endif /* _LINUX_SWAP_H */ | 436 | #endif /* _LINUX_SWAP_H */ |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index decace3bb57e..7288e9d85ca7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/memcontrol.h> | 21 | #include <linux/memcontrol.h> |
22 | #include <linux/cgroup.h> | 22 | #include <linux/cgroup.h> |
23 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
24 | #include <linux/pagemap.h> | ||
24 | #include <linux/smp.h> | 25 | #include <linux/smp.h> |
25 | #include <linux/page-flags.h> | 26 | #include <linux/page-flags.h> |
26 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
@@ -139,6 +140,7 @@ enum charge_type { | |||
139 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 140 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
140 | MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ | 141 | MEM_CGROUP_CHARGE_TYPE_SHMEM, /* used by page migration of shmem */ |
141 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ | 142 | MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ |
143 | MEM_CGROUP_CHARGE_TYPE_SWAPOUT, /* for accounting swapcache */ | ||
142 | NR_CHARGE_TYPE, | 144 | NR_CHARGE_TYPE, |
143 | }; | 145 | }; |
144 | 146 | ||
@@ -780,6 +782,33 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
780 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | 782 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); |
781 | } | 783 | } |
782 | 784 | ||
785 | #ifdef CONFIG_SWAP | ||
786 | int mem_cgroup_cache_charge_swapin(struct page *page, | ||
787 | struct mm_struct *mm, gfp_t mask, bool locked) | ||
788 | { | ||
789 | int ret = 0; | ||
790 | |||
791 | if (mem_cgroup_subsys.disabled) | ||
792 | return 0; | ||
793 | if (unlikely(!mm)) | ||
794 | mm = &init_mm; | ||
795 | if (!locked) | ||
796 | lock_page(page); | ||
797 | /* | ||
798 | * If not locked, the page can be dropped from SwapCache until | ||
799 | * we reach here. | ||
800 | */ | ||
801 | if (PageSwapCache(page)) { | ||
802 | ret = mem_cgroup_charge_common(page, mm, mask, | ||
803 | MEM_CGROUP_CHARGE_TYPE_SHMEM, NULL); | ||
804 | } | ||
805 | if (!locked) | ||
806 | unlock_page(page); | ||
807 | |||
808 | return ret; | ||
809 | } | ||
810 | #endif | ||
811 | |||
783 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) | 812 | void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) |
784 | { | 813 | { |
785 | struct page_cgroup *pc; | 814 | struct page_cgroup *pc; |
@@ -817,6 +846,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
817 | if (mem_cgroup_subsys.disabled) | 846 | if (mem_cgroup_subsys.disabled) |
818 | return; | 847 | return; |
819 | 848 | ||
849 | if (PageSwapCache(page)) | ||
850 | return; | ||
851 | |||
820 | /* | 852 | /* |
821 | * Check if our page_cgroup is valid | 853 | * Check if our page_cgroup is valid |
822 | */ | 854 | */ |
@@ -825,12 +857,26 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
825 | return; | 857 | return; |
826 | 858 | ||
827 | lock_page_cgroup(pc); | 859 | lock_page_cgroup(pc); |
828 | if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED && page_mapped(page)) | 860 | |
829 | || !PageCgroupUsed(pc)) { | 861 | if (!PageCgroupUsed(pc)) |
830 | /* This happens at race in zap_pte_range() and do_swap_page()*/ | 862 | goto unlock_out; |
831 | unlock_page_cgroup(pc); | 863 | |
832 | return; | 864 | switch (ctype) { |
865 | case MEM_CGROUP_CHARGE_TYPE_MAPPED: | ||
866 | if (page_mapped(page)) | ||
867 | goto unlock_out; | ||
868 | break; | ||
869 | case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: | ||
870 | if (!PageAnon(page)) { /* Shared memory */ | ||
871 | if (page->mapping && !page_is_file_cache(page)) | ||
872 | goto unlock_out; | ||
873 | } else if (page_mapped(page)) /* Anon */ | ||
874 | goto unlock_out; | ||
875 | break; | ||
876 | default: | ||
877 | break; | ||
833 | } | 878 | } |
879 | |||
834 | ClearPageCgroupUsed(pc); | 880 | ClearPageCgroupUsed(pc); |
835 | mem = pc->mem_cgroup; | 881 | mem = pc->mem_cgroup; |
836 | 882 | ||
@@ -844,6 +890,10 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
844 | css_put(&mem->css); | 890 | css_put(&mem->css); |
845 | 891 | ||
846 | return; | 892 | return; |
893 | |||
894 | unlock_out: | ||
895 | unlock_page_cgroup(pc); | ||
896 | return; | ||
847 | } | 897 | } |
848 | 898 | ||
849 | void mem_cgroup_uncharge_page(struct page *page) | 899 | void mem_cgroup_uncharge_page(struct page *page) |
@@ -863,6 +913,11 @@ void mem_cgroup_uncharge_cache_page(struct page *page) | |||
863 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); | 913 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); |
864 | } | 914 | } |
865 | 915 | ||
916 | void mem_cgroup_uncharge_swapcache(struct page *page) | ||
917 | { | ||
918 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_SWAPOUT); | ||
919 | } | ||
920 | |||
866 | /* | 921 | /* |
867 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old | 922 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old |
868 | * page belongs to. | 923 | * page belongs to. |
@@ -920,7 +975,7 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem, | |||
920 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 975 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
921 | 976 | ||
922 | /* unused page is not on radix-tree now. */ | 977 | /* unused page is not on radix-tree now. */ |
923 | if (unused && ctype != MEM_CGROUP_CHARGE_TYPE_MAPPED) | 978 | if (unused) |
924 | __mem_cgroup_uncharge_common(unused, ctype); | 979 | __mem_cgroup_uncharge_common(unused, ctype); |
925 | 980 | ||
926 | pc = lookup_page_cgroup(target); | 981 | pc = lookup_page_cgroup(target); |
diff --git a/mm/shmem.c b/mm/shmem.c index bd9b4ea307b2..adf5c3eedbc9 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -928,8 +928,12 @@ found: | |||
928 | error = 1; | 928 | error = 1; |
929 | if (!inode) | 929 | if (!inode) |
930 | goto out; | 930 | goto out; |
931 | /* Charge page using GFP_HIGHUSER_MOVABLE while we can wait */ | 931 | /* |
932 | error = mem_cgroup_cache_charge(page, current->mm, GFP_HIGHUSER_MOVABLE); | 932 | * Charge page using GFP_HIGHUSER_MOVABLE while we can wait. |
933 | * charged back to the user(not to caller) when swap account is used. | ||
934 | */ | ||
935 | error = mem_cgroup_cache_charge_swapin(page, | ||
936 | current->mm, GFP_HIGHUSER_MOVABLE, true); | ||
933 | if (error) | 937 | if (error) |
934 | goto out; | 938 | goto out; |
935 | error = radix_tree_preload(GFP_KERNEL); | 939 | error = radix_tree_preload(GFP_KERNEL); |
@@ -1266,6 +1270,16 @@ repeat: | |||
1266 | goto repeat; | 1270 | goto repeat; |
1267 | } | 1271 | } |
1268 | wait_on_page_locked(swappage); | 1272 | wait_on_page_locked(swappage); |
1273 | /* | ||
1274 | * We want to avoid charge at add_to_page_cache(). | ||
1275 | * charge against this swap cache here. | ||
1276 | */ | ||
1277 | if (mem_cgroup_cache_charge_swapin(swappage, | ||
1278 | current->mm, gfp, false)) { | ||
1279 | page_cache_release(swappage); | ||
1280 | error = -ENOMEM; | ||
1281 | goto failed; | ||
1282 | } | ||
1269 | page_cache_release(swappage); | 1283 | page_cache_release(swappage); |
1270 | goto repeat; | 1284 | goto repeat; |
1271 | } | 1285 | } |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 81c825f67a7f..09291ca11f5f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -118,6 +118,7 @@ void __delete_from_swap_cache(struct page *page) | |||
118 | total_swapcache_pages--; | 118 | total_swapcache_pages--; |
119 | __dec_zone_page_state(page, NR_FILE_PAGES); | 119 | __dec_zone_page_state(page, NR_FILE_PAGES); |
120 | INC_CACHE_INFO(del_total); | 120 | INC_CACHE_INFO(del_total); |
121 | mem_cgroup_uncharge_swapcache(page); | ||
121 | } | 122 | } |
122 | 123 | ||
123 | /** | 124 | /** |