diff options
author | Balbir Singh <balbir@linux.vnet.ibm.com> | 2008-02-07 03:14:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:19 -0500 |
commit | e1a1cd590e3fcb0d2e230128daf2337ea55387dc (patch) | |
tree | eb660ab340c657a1eb595b2d4d8e8b62783bf6fb | |
parent | bed7161a519a2faef53e1bce1b47595e297c1d14 (diff) |
Memory controller: make charging gfp mask aware
Nick Piggin pointed out that swap cache and page cache addition routines
could be called from non GFP_KERNEL contexts. This patch makes the
charging routine aware of the gfp context. Charging might fail if the
cgroup is over it's limit, in which case a suitable error is returned.
This patch was tested on a Powerpc box. I am still looking at being able
to test the path, through which allocations happen in non GFP_KERNEL
contexts.
[kamezawa.hiroyu@jp.fujitsu.com: problem with ZONE_MOVABLE]
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: David Rientjes <rientjes@google.com>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 12 | ||||
-rw-r--r-- | include/linux/swap.h | 3 | ||||
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 24 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/migrate.c | 2 | ||||
-rw-r--r-- | mm/swap_state.c | 2 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 14 |
9 files changed, 41 insertions, 30 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9d0a830423b6..cc0ad7191acd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm); | |||
32 | extern void page_assign_page_cgroup(struct page *page, | 32 | extern void page_assign_page_cgroup(struct page *page, |
33 | struct page_cgroup *pc); | 33 | struct page_cgroup *pc); |
34 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); | 34 | extern struct page_cgroup *page_get_page_cgroup(struct page *page); |
35 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm); | 35 | extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, |
36 | gfp_t gfp_mask); | ||
36 | extern void mem_cgroup_uncharge(struct page_cgroup *pc); | 37 | extern void mem_cgroup_uncharge(struct page_cgroup *pc); |
37 | extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); | 38 | extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); |
38 | extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | 39 | extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, |
@@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
42 | struct mem_cgroup *mem_cont, | 43 | struct mem_cgroup *mem_cont, |
43 | int active); | 44 | int active); |
44 | extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); | 45 | extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); |
45 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm); | 46 | extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
47 | gfp_t gfp_mask); | ||
46 | extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); | 48 | extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); |
47 | 49 | ||
48 | static inline void mem_cgroup_uncharge_page(struct page *page) | 50 | static inline void mem_cgroup_uncharge_page(struct page *page) |
@@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page) | |||
70 | return NULL; | 72 | return NULL; |
71 | } | 73 | } |
72 | 74 | ||
73 | static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm) | 75 | static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, |
76 | gfp_t gfp_mask) | ||
74 | { | 77 | { |
75 | return 0; | 78 | return 0; |
76 | } | 79 | } |
@@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc, | |||
89 | } | 92 | } |
90 | 93 | ||
91 | static inline int mem_cgroup_cache_charge(struct page *page, | 94 | static inline int mem_cgroup_cache_charge(struct page *page, |
92 | struct mm_struct *mm) | 95 | struct mm_struct *mm, |
96 | gfp_t gfp_mask) | ||
93 | { | 97 | { |
94 | return 0; | 98 | return 0; |
95 | } | 99 | } |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 4d91bc0e0fd5..3ca5c4bd6d3f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -183,7 +183,8 @@ extern void swap_setup(void); | |||
183 | /* linux/mm/vmscan.c */ | 183 | /* linux/mm/vmscan.c */ |
184 | extern unsigned long try_to_free_pages(struct zone **zones, int order, | 184 | extern unsigned long try_to_free_pages(struct zone **zones, int order, |
185 | gfp_t gfp_mask); | 185 | gfp_t gfp_mask); |
186 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem); | 186 | extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem, |
187 | gfp_t gfp_mask); | ||
187 | extern int __isolate_lru_page(struct page *page, int mode); | 188 | extern int __isolate_lru_page(struct page *page, int mode); |
188 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | 189 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
189 | extern int vm_swappiness; | 190 | extern int vm_swappiness; |
diff --git a/mm/filemap.c b/mm/filemap.c index 8ae171cc2811..63040d5e0ae2 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, | |||
464 | 464 | ||
465 | if (error == 0) { | 465 | if (error == 0) { |
466 | 466 | ||
467 | error = mem_cgroup_cache_charge(page, current->mm); | 467 | error = mem_cgroup_cache_charge(page, current->mm, gfp_mask); |
468 | if (error) | 468 | if (error) |
469 | goto out; | 469 | goto out; |
470 | 470 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ff7cac602984..ac8774426fec 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
261 | * 0 if the charge was successful | 261 | * 0 if the charge was successful |
262 | * < 0 if the cgroup is over its limit | 262 | * < 0 if the cgroup is over its limit |
263 | */ | 263 | */ |
264 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm) | 264 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, |
265 | gfp_t gfp_mask) | ||
265 | { | 266 | { |
266 | struct mem_cgroup *mem; | 267 | struct mem_cgroup *mem; |
267 | struct page_cgroup *pc, *race_pc; | 268 | struct page_cgroup *pc, *race_pc; |
@@ -293,7 +294,7 @@ retry: | |||
293 | 294 | ||
294 | unlock_page_cgroup(page); | 295 | unlock_page_cgroup(page); |
295 | 296 | ||
296 | pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL); | 297 | pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); |
297 | if (pc == NULL) | 298 | if (pc == NULL) |
298 | goto err; | 299 | goto err; |
299 | 300 | ||
@@ -320,7 +321,14 @@ retry: | |||
320 | * the cgroup limit. | 321 | * the cgroup limit. |
321 | */ | 322 | */ |
322 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 323 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
323 | if (try_to_free_mem_cgroup_pages(mem)) | 324 | bool is_atomic = gfp_mask & GFP_ATOMIC; |
325 | /* | ||
326 | * We cannot reclaim under GFP_ATOMIC, fail the charge | ||
327 | */ | ||
328 | if (is_atomic) | ||
329 | goto noreclaim; | ||
330 | |||
331 | if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) | ||
324 | continue; | 332 | continue; |
325 | 333 | ||
326 | /* | 334 | /* |
@@ -344,9 +352,10 @@ retry: | |||
344 | congestion_wait(WRITE, HZ/10); | 352 | congestion_wait(WRITE, HZ/10); |
345 | continue; | 353 | continue; |
346 | } | 354 | } |
347 | 355 | noreclaim: | |
348 | css_put(&mem->css); | 356 | css_put(&mem->css); |
349 | mem_cgroup_out_of_memory(mem, GFP_KERNEL); | 357 | if (!is_atomic) |
358 | mem_cgroup_out_of_memory(mem, GFP_KERNEL); | ||
350 | goto free_pc; | 359 | goto free_pc; |
351 | } | 360 | } |
352 | 361 | ||
@@ -385,7 +394,8 @@ err: | |||
385 | /* | 394 | /* |
386 | * See if the cached pages should be charged at all? | 395 | * See if the cached pages should be charged at all? |
387 | */ | 396 | */ |
388 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) | 397 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
398 | gfp_t gfp_mask) | ||
389 | { | 399 | { |
390 | struct mem_cgroup *mem; | 400 | struct mem_cgroup *mem; |
391 | if (!mm) | 401 | if (!mm) |
@@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) | |||
393 | 403 | ||
394 | mem = rcu_dereference(mm->mem_cgroup); | 404 | mem = rcu_dereference(mm->mem_cgroup); |
395 | if (mem->control_type == MEM_CGROUP_TYPE_ALL) | 405 | if (mem->control_type == MEM_CGROUP_TYPE_ALL) |
396 | return mem_cgroup_charge(page, mm); | 406 | return mem_cgroup_charge(page, mm, gfp_mask); |
397 | else | 407 | else |
398 | return 0; | 408 | return 0; |
399 | } | 409 | } |
diff --git a/mm/memory.c b/mm/memory.c index 0ba224ea6ba4..153a54b2013c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa | |||
1147 | pte_t *pte; | 1147 | pte_t *pte; |
1148 | spinlock_t *ptl; | 1148 | spinlock_t *ptl; |
1149 | 1149 | ||
1150 | retval = mem_cgroup_charge(page, mm); | 1150 | retval = mem_cgroup_charge(page, mm, GFP_KERNEL); |
1151 | if (retval) | 1151 | if (retval) |
1152 | goto out; | 1152 | goto out; |
1153 | 1153 | ||
@@ -1650,7 +1650,7 @@ gotten: | |||
1650 | cow_user_page(new_page, old_page, address, vma); | 1650 | cow_user_page(new_page, old_page, address, vma); |
1651 | __SetPageUptodate(new_page); | 1651 | __SetPageUptodate(new_page); |
1652 | 1652 | ||
1653 | if (mem_cgroup_charge(new_page, mm)) | 1653 | if (mem_cgroup_charge(new_page, mm, GFP_KERNEL)) |
1654 | goto oom_free_new; | 1654 | goto oom_free_new; |
1655 | 1655 | ||
1656 | /* | 1656 | /* |
@@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2052 | count_vm_event(PGMAJFAULT); | 2052 | count_vm_event(PGMAJFAULT); |
2053 | } | 2053 | } |
2054 | 2054 | ||
2055 | if (mem_cgroup_charge(page, mm)) { | 2055 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { |
2056 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2056 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2057 | ret = VM_FAULT_OOM; | 2057 | ret = VM_FAULT_OOM; |
2058 | goto out; | 2058 | goto out; |
@@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2139 | goto oom; | 2139 | goto oom; |
2140 | __SetPageUptodate(page); | 2140 | __SetPageUptodate(page); |
2141 | 2141 | ||
2142 | if (mem_cgroup_charge(page, mm)) | 2142 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) |
2143 | goto oom_free_page; | 2143 | goto oom_free_page; |
2144 | 2144 | ||
2145 | entry = mk_pte(page, vma->vm_page_prot); | 2145 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2277 | 2277 | ||
2278 | } | 2278 | } |
2279 | 2279 | ||
2280 | if (mem_cgroup_charge(page, mm)) { | 2280 | if (mem_cgroup_charge(page, mm, GFP_KERNEL)) { |
2281 | ret = VM_FAULT_OOM; | 2281 | ret = VM_FAULT_OOM; |
2282 | goto out; | 2282 | goto out; |
2283 | } | 2283 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 417bbda14e5b..763794144697 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma, | |||
153 | return; | 153 | return; |
154 | } | 154 | } |
155 | 155 | ||
156 | if (mem_cgroup_charge(new, mm)) { | 156 | if (mem_cgroup_charge(new, mm, GFP_KERNEL)) { |
157 | pte_unmap(ptep); | 157 | pte_unmap(ptep); |
158 | return; | 158 | return; |
159 | } | 159 | } |
diff --git a/mm/swap_state.c b/mm/swap_state.c index 88258869c8e7..581b609e748d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c | |||
@@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) | |||
78 | error = radix_tree_preload(gfp_mask); | 78 | error = radix_tree_preload(gfp_mask); |
79 | if (!error) { | 79 | if (!error) { |
80 | 80 | ||
81 | error = mem_cgroup_cache_charge(page, current->mm); | 81 | error = mem_cgroup_cache_charge(page, current->mm, gfp_mask); |
82 | if (error) | 82 | if (error) |
83 | goto out; | 83 | goto out; |
84 | 84 | ||
diff --git a/mm/swapfile.c b/mm/swapfile.c index fddc4cc4149b..35e00c3d0286 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free) | |||
510 | static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, | 510 | static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, |
511 | unsigned long addr, swp_entry_t entry, struct page *page) | 511 | unsigned long addr, swp_entry_t entry, struct page *page) |
512 | { | 512 | { |
513 | if (mem_cgroup_charge(page, vma->vm_mm)) | 513 | if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) |
514 | return -ENOMEM; | 514 | return -ENOMEM; |
515 | 515 | ||
516 | inc_mm_counter(vma->vm_mm, anon_rss); | 516 | inc_mm_counter(vma->vm_mm, anon_rss); |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 215f6a726b2f..b7d868cbca09 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask) | |||
1337 | 1337 | ||
1338 | #ifdef CONFIG_CGROUP_MEM_CONT | 1338 | #ifdef CONFIG_CGROUP_MEM_CONT |
1339 | 1339 | ||
1340 | #ifdef CONFIG_HIGHMEM | 1340 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont, |
1341 | #define ZONE_USERPAGES ZONE_HIGHMEM | 1341 | gfp_t gfp_mask) |
1342 | #else | ||
1343 | #define ZONE_USERPAGES ZONE_NORMAL | ||
1344 | #endif | ||
1345 | |||
1346 | unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) | ||
1347 | { | 1342 | { |
1348 | struct scan_control sc = { | 1343 | struct scan_control sc = { |
1349 | .gfp_mask = GFP_KERNEL, | 1344 | .gfp_mask = gfp_mask, |
1350 | .may_writepage = !laptop_mode, | 1345 | .may_writepage = !laptop_mode, |
1351 | .may_swap = 1, | 1346 | .may_swap = 1, |
1352 | .swap_cluster_max = SWAP_CLUSTER_MAX, | 1347 | .swap_cluster_max = SWAP_CLUSTER_MAX, |
@@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont) | |||
1357 | }; | 1352 | }; |
1358 | int node; | 1353 | int node; |
1359 | struct zone **zones; | 1354 | struct zone **zones; |
1355 | int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE); | ||
1360 | 1356 | ||
1361 | for_each_online_node(node) { | 1357 | for_each_online_node(node) { |
1362 | zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones; | 1358 | zones = NODE_DATA(node)->node_zonelists[target_zone].zones; |
1363 | if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) | 1359 | if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) |
1364 | return 1; | 1360 | return 1; |
1365 | } | 1361 | } |