aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBalbir Singh <balbir@linux.vnet.ibm.com>2008-02-07 03:14:02 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:19 -0500
commite1a1cd590e3fcb0d2e230128daf2337ea55387dc (patch)
treeeb660ab340c657a1eb595b2d4d8e8b62783bf6fb
parentbed7161a519a2faef53e1bce1b47595e297c1d14 (diff)
Memory controller: make charging gfp mask aware
Nick Piggin pointed out that swap cache and page cache addition routines could be called from non GFP_KERNEL contexts. This patch makes the charging routine aware of the gfp context. Charging might fail if the cgroup is over it's limit, in which case a suitable error is returned. This patch was tested on a Powerpc box. I am still looking at being able to test the path, through which allocations happen in non GFP_KERNEL contexts. [kamezawa.hiroyu@jp.fujitsu.com: problem with ZONE_MOVABLE] Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/memcontrol.h12
-rw-r--r--include/linux/swap.h3
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/memcontrol.c24
-rw-r--r--mm/memory.c10
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/swap_state.c2
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/vmscan.c14
9 files changed, 41 insertions, 30 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9d0a830423b..cc0ad7191ac 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -32,7 +32,8 @@ extern void mm_free_cgroup(struct mm_struct *mm);
32extern void page_assign_page_cgroup(struct page *page, 32extern void page_assign_page_cgroup(struct page *page,
33 struct page_cgroup *pc); 33 struct page_cgroup *pc);
34extern struct page_cgroup *page_get_page_cgroup(struct page *page); 34extern struct page_cgroup *page_get_page_cgroup(struct page *page);
35extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm); 35extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
36 gfp_t gfp_mask);
36extern void mem_cgroup_uncharge(struct page_cgroup *pc); 37extern void mem_cgroup_uncharge(struct page_cgroup *pc);
37extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active); 38extern void mem_cgroup_move_lists(struct page_cgroup *pc, bool active);
38extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, 39extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
@@ -42,7 +43,8 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
42 struct mem_cgroup *mem_cont, 43 struct mem_cgroup *mem_cont,
43 int active); 44 int active);
44extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask); 45extern void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask);
45extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm); 46extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
47 gfp_t gfp_mask);
46extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm); 48extern struct mem_cgroup *mm_cgroup(struct mm_struct *mm);
47 49
48static inline void mem_cgroup_uncharge_page(struct page *page) 50static inline void mem_cgroup_uncharge_page(struct page *page)
@@ -70,7 +72,8 @@ static inline struct page_cgroup *page_get_page_cgroup(struct page *page)
70 return NULL; 72 return NULL;
71} 73}
72 74
73static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm) 75static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
76 gfp_t gfp_mask)
74{ 77{
75 return 0; 78 return 0;
76} 79}
@@ -89,7 +92,8 @@ static inline void mem_cgroup_move_lists(struct page_cgroup *pc,
89} 92}
90 93
91static inline int mem_cgroup_cache_charge(struct page *page, 94static inline int mem_cgroup_cache_charge(struct page *page,
92 struct mm_struct *mm) 95 struct mm_struct *mm,
96 gfp_t gfp_mask)
93{ 97{
94 return 0; 98 return 0;
95} 99}
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d91bc0e0fd..3ca5c4bd6d3 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -183,7 +183,8 @@ extern void swap_setup(void);
183/* linux/mm/vmscan.c */ 183/* linux/mm/vmscan.c */
184extern unsigned long try_to_free_pages(struct zone **zones, int order, 184extern unsigned long try_to_free_pages(struct zone **zones, int order,
185 gfp_t gfp_mask); 185 gfp_t gfp_mask);
186extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem); 186extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
187 gfp_t gfp_mask);
187extern int __isolate_lru_page(struct page *page, int mode); 188extern int __isolate_lru_page(struct page *page, int mode);
188extern unsigned long shrink_all_memory(unsigned long nr_pages); 189extern unsigned long shrink_all_memory(unsigned long nr_pages);
189extern int vm_swappiness; 190extern int vm_swappiness;
diff --git a/mm/filemap.c b/mm/filemap.c
index 8ae171cc281..63040d5e0ae 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -464,7 +464,7 @@ int add_to_page_cache(struct page *page, struct address_space *mapping,
464 464
465 if (error == 0) { 465 if (error == 0) {
466 466
467 error = mem_cgroup_cache_charge(page, current->mm); 467 error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
468 if (error) 468 if (error)
469 goto out; 469 goto out;
470 470
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ff7cac60298..ac8774426fe 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -261,7 +261,8 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
261 * 0 if the charge was successful 261 * 0 if the charge was successful
262 * < 0 if the cgroup is over its limit 262 * < 0 if the cgroup is over its limit
263 */ 263 */
264int mem_cgroup_charge(struct page *page, struct mm_struct *mm) 264int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
265 gfp_t gfp_mask)
265{ 266{
266 struct mem_cgroup *mem; 267 struct mem_cgroup *mem;
267 struct page_cgroup *pc, *race_pc; 268 struct page_cgroup *pc, *race_pc;
@@ -293,7 +294,7 @@ retry:
293 294
294 unlock_page_cgroup(page); 295 unlock_page_cgroup(page);
295 296
296 pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL); 297 pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
297 if (pc == NULL) 298 if (pc == NULL)
298 goto err; 299 goto err;
299 300
@@ -320,7 +321,14 @@ retry:
320 * the cgroup limit. 321 * the cgroup limit.
321 */ 322 */
322 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 323 while (res_counter_charge(&mem->res, PAGE_SIZE)) {
323 if (try_to_free_mem_cgroup_pages(mem)) 324 bool is_atomic = gfp_mask & GFP_ATOMIC;
325 /*
326 * We cannot reclaim under GFP_ATOMIC, fail the charge
327 */
328 if (is_atomic)
329 goto noreclaim;
330
331 if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
324 continue; 332 continue;
325 333
326 /* 334 /*
@@ -344,9 +352,10 @@ retry:
344 congestion_wait(WRITE, HZ/10); 352 congestion_wait(WRITE, HZ/10);
345 continue; 353 continue;
346 } 354 }
347 355noreclaim:
348 css_put(&mem->css); 356 css_put(&mem->css);
349 mem_cgroup_out_of_memory(mem, GFP_KERNEL); 357 if (!is_atomic)
358 mem_cgroup_out_of_memory(mem, GFP_KERNEL);
350 goto free_pc; 359 goto free_pc;
351 } 360 }
352 361
@@ -385,7 +394,8 @@ err:
385/* 394/*
386 * See if the cached pages should be charged at all? 395 * See if the cached pages should be charged at all?
387 */ 396 */
388int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm) 397int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
398 gfp_t gfp_mask)
389{ 399{
390 struct mem_cgroup *mem; 400 struct mem_cgroup *mem;
391 if (!mm) 401 if (!mm)
@@ -393,7 +403,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm)
393 403
394 mem = rcu_dereference(mm->mem_cgroup); 404 mem = rcu_dereference(mm->mem_cgroup);
395 if (mem->control_type == MEM_CGROUP_TYPE_ALL) 405 if (mem->control_type == MEM_CGROUP_TYPE_ALL)
396 return mem_cgroup_charge(page, mm); 406 return mem_cgroup_charge(page, mm, gfp_mask);
397 else 407 else
398 return 0; 408 return 0;
399} 409}
diff --git a/mm/memory.c b/mm/memory.c
index 0ba224ea6ba..153a54b2013 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1147,7 +1147,7 @@ static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *pa
1147 pte_t *pte; 1147 pte_t *pte;
1148 spinlock_t *ptl; 1148 spinlock_t *ptl;
1149 1149
1150 retval = mem_cgroup_charge(page, mm); 1150 retval = mem_cgroup_charge(page, mm, GFP_KERNEL);
1151 if (retval) 1151 if (retval)
1152 goto out; 1152 goto out;
1153 1153
@@ -1650,7 +1650,7 @@ gotten:
1650 cow_user_page(new_page, old_page, address, vma); 1650 cow_user_page(new_page, old_page, address, vma);
1651 __SetPageUptodate(new_page); 1651 __SetPageUptodate(new_page);
1652 1652
1653 if (mem_cgroup_charge(new_page, mm)) 1653 if (mem_cgroup_charge(new_page, mm, GFP_KERNEL))
1654 goto oom_free_new; 1654 goto oom_free_new;
1655 1655
1656 /* 1656 /*
@@ -2052,7 +2052,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2052 count_vm_event(PGMAJFAULT); 2052 count_vm_event(PGMAJFAULT);
2053 } 2053 }
2054 2054
2055 if (mem_cgroup_charge(page, mm)) { 2055 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2056 delayacct_clear_flag(DELAYACCT_PF_SWAPIN); 2056 delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
2057 ret = VM_FAULT_OOM; 2057 ret = VM_FAULT_OOM;
2058 goto out; 2058 goto out;
@@ -2139,7 +2139,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2139 goto oom; 2139 goto oom;
2140 __SetPageUptodate(page); 2140 __SetPageUptodate(page);
2141 2141
2142 if (mem_cgroup_charge(page, mm)) 2142 if (mem_cgroup_charge(page, mm, GFP_KERNEL))
2143 goto oom_free_page; 2143 goto oom_free_page;
2144 2144
2145 entry = mk_pte(page, vma->vm_page_prot); 2145 entry = mk_pte(page, vma->vm_page_prot);
@@ -2277,7 +2277,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2277 2277
2278 } 2278 }
2279 2279
2280 if (mem_cgroup_charge(page, mm)) { 2280 if (mem_cgroup_charge(page, mm, GFP_KERNEL)) {
2281 ret = VM_FAULT_OOM; 2281 ret = VM_FAULT_OOM;
2282 goto out; 2282 goto out;
2283 } 2283 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 417bbda14e5..76379414469 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -153,7 +153,7 @@ static void remove_migration_pte(struct vm_area_struct *vma,
153 return; 153 return;
154 } 154 }
155 155
156 if (mem_cgroup_charge(new, mm)) { 156 if (mem_cgroup_charge(new, mm, GFP_KERNEL)) {
157 pte_unmap(ptep); 157 pte_unmap(ptep);
158 return; 158 return;
159 } 159 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 88258869c8e..581b609e748 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -78,7 +78,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
78 error = radix_tree_preload(gfp_mask); 78 error = radix_tree_preload(gfp_mask);
79 if (!error) { 79 if (!error) {
80 80
81 error = mem_cgroup_cache_charge(page, current->mm); 81 error = mem_cgroup_cache_charge(page, current->mm, gfp_mask);
82 if (error) 82 if (error)
83 goto out; 83 goto out;
84 84
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fddc4cc4149..35e00c3d028 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -510,7 +510,7 @@ unsigned int count_swap_pages(int type, int free)
510static int unuse_pte(struct vm_area_struct *vma, pte_t *pte, 510static int unuse_pte(struct vm_area_struct *vma, pte_t *pte,
511 unsigned long addr, swp_entry_t entry, struct page *page) 511 unsigned long addr, swp_entry_t entry, struct page *page)
512{ 512{
513 if (mem_cgroup_charge(page, vma->vm_mm)) 513 if (mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL))
514 return -ENOMEM; 514 return -ENOMEM;
515 515
516 inc_mm_counter(vma->vm_mm, anon_rss); 516 inc_mm_counter(vma->vm_mm, anon_rss);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 215f6a726b2..b7d868cbca0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1337,16 +1337,11 @@ unsigned long try_to_free_pages(struct zone **zones, int order, gfp_t gfp_mask)
1337 1337
1338#ifdef CONFIG_CGROUP_MEM_CONT 1338#ifdef CONFIG_CGROUP_MEM_CONT
1339 1339
1340#ifdef CONFIG_HIGHMEM 1340unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
1341#define ZONE_USERPAGES ZONE_HIGHMEM 1341 gfp_t gfp_mask)
1342#else
1343#define ZONE_USERPAGES ZONE_NORMAL
1344#endif
1345
1346unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
1347{ 1342{
1348 struct scan_control sc = { 1343 struct scan_control sc = {
1349 .gfp_mask = GFP_KERNEL, 1344 .gfp_mask = gfp_mask,
1350 .may_writepage = !laptop_mode, 1345 .may_writepage = !laptop_mode,
1351 .may_swap = 1, 1346 .may_swap = 1,
1352 .swap_cluster_max = SWAP_CLUSTER_MAX, 1347 .swap_cluster_max = SWAP_CLUSTER_MAX,
@@ -1357,9 +1352,10 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont)
1357 }; 1352 };
1358 int node; 1353 int node;
1359 struct zone **zones; 1354 struct zone **zones;
1355 int target_zone = gfp_zone(GFP_HIGHUSER_MOVABLE);
1360 1356
1361 for_each_online_node(node) { 1357 for_each_online_node(node) {
1362 zones = NODE_DATA(node)->node_zonelists[ZONE_USERPAGES].zones; 1358 zones = NODE_DATA(node)->node_zonelists[target_zone].zones;
1363 if (do_try_to_free_pages(zones, sc.gfp_mask, &sc)) 1359 if (do_try_to_free_pages(zones, sc.gfp_mask, &sc))
1364 return 1; 1360 return 1;
1365 } 1361 }