aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-02-07 03:14:22 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-07 11:42:20 -0500
commit82369553d6d3bc67c54129a02e0bc0b5b88f3045 (patch)
tree1d80a6cc9f5840550ad025b32ac8ef8fd915fd98 /mm
parent3be91277e754c7db04eae145ba622b3a3e3ad96d (diff)
memcgroup: fix hang with shmem/tmpfs
The memcgroup regime relies upon a cgroup reclaiming pages from itself within add_to_page_cache: which may involve some waiting. Whereas shmem and tmpfs rely upon using add_to_page_cache while holding a spinlock: when it cannot wait. The consequence is that when a cgroup reaches its limit, shmem_getpage just hangs - unless there is outside memory pressure too, neither kswapd nor radix_tree_preload get it out of the retry loop. In most cases we can mem_cgroup_cache_charge the page waitably first, to attach the page_cgroup in advance, so add_to_page_cache will do no more than increment a count; then mem_cgroup_uncharge_page after (in both success and failure cases) to balance the books again. And where there used to be a congestion_wait for kswapd (recently made redundant by radix_tree_preload), use mem_cgroup_cache_charge with NULL page to go through a cycle of allocation and freeing, without accounting to any particular page, and without updating the statistics vector. This brings the cgroup below its limit so the next try usually succeeds. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c37
-rw-r--r--mm/shmem.c28
2 files changed, 48 insertions, 17 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dbf571547c03..11b23f203d68 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -329,23 +329,26 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
329 * with it 329 * with it
330 */ 330 */
331retry: 331retry:
332 lock_page_cgroup(page); 332 if (page) {
333 pc = page_get_page_cgroup(page); 333 lock_page_cgroup(page);
334 /* 334 pc = page_get_page_cgroup(page);
335 * The page_cgroup exists and the page has already been accounted 335 /*
336 */ 336 * The page_cgroup exists and
337 if (pc) { 337 * the page has already been accounted.
338 if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { 338 */
339 /* this page is under being uncharged ? */ 339 if (pc) {
340 unlock_page_cgroup(page); 340 if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
341 cpu_relax(); 341 /* this page is under being uncharged ? */
342 goto retry; 342 unlock_page_cgroup(page);
343 } else { 343 cpu_relax();
344 unlock_page_cgroup(page); 344 goto retry;
345 goto done; 345 } else {
346 unlock_page_cgroup(page);
347 goto done;
348 }
346 } 349 }
350 unlock_page_cgroup(page);
347 } 351 }
348 unlock_page_cgroup(page);
349 352
350 pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); 353 pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
351 if (pc == NULL) 354 if (pc == NULL)
@@ -404,7 +407,7 @@ retry:
404 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 407 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
405 pc->flags |= PAGE_CGROUP_FLAG_CACHE; 408 pc->flags |= PAGE_CGROUP_FLAG_CACHE;
406 409
407 if (page_cgroup_assign_new_page_cgroup(page, pc)) { 410 if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) {
408 /* 411 /*
409 * Another charge has been added to this page already. 412 * Another charge has been added to this page already.
410 * We take lock_page_cgroup(page) again and read 413 * We take lock_page_cgroup(page) again and read
@@ -413,6 +416,8 @@ retry:
413 res_counter_uncharge(&mem->res, PAGE_SIZE); 416 res_counter_uncharge(&mem->res, PAGE_SIZE);
414 css_put(&mem->css); 417 css_put(&mem->css);
415 kfree(pc); 418 kfree(pc);
419 if (!page)
420 goto done;
416 goto retry; 421 goto retry;
417 } 422 }
418 423
diff --git a/mm/shmem.c b/mm/shmem.c
index 0f246c44a574..85bed948fafc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -912,9 +912,13 @@ found:
912 error = 1; 912 error = 1;
913 if (!inode) 913 if (!inode)
914 goto out; 914 goto out;
915 error = radix_tree_preload(GFP_KERNEL); 915 /* Precharge page while we can wait, compensate afterwards */
916 error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
916 if (error) 917 if (error)
917 goto out; 918 goto out;
919 error = radix_tree_preload(GFP_KERNEL);
920 if (error)
921 goto uncharge;
918 error = 1; 922 error = 1;
919 923
920 spin_lock(&info->lock); 924 spin_lock(&info->lock);
@@ -947,6 +951,8 @@ found:
947 shmem_swp_unmap(ptr); 951 shmem_swp_unmap(ptr);
948 spin_unlock(&info->lock); 952 spin_unlock(&info->lock);
949 radix_tree_preload_end(); 953 radix_tree_preload_end();
954uncharge:
955 mem_cgroup_uncharge_page(page);
950out: 956out:
951 unlock_page(page); 957 unlock_page(page);
952 page_cache_release(page); 958 page_cache_release(page);
@@ -1308,6 +1314,13 @@ repeat:
1308 spin_unlock(&info->lock); 1314 spin_unlock(&info->lock);
1309 unlock_page(swappage); 1315 unlock_page(swappage);
1310 page_cache_release(swappage); 1316 page_cache_release(swappage);
1317 if (error == -ENOMEM) {
1318 /* allow reclaim from this memory cgroup */
1319 error = mem_cgroup_cache_charge(NULL,
1320 current->mm, gfp & ~__GFP_HIGHMEM);
1321 if (error)
1322 goto failed;
1323 }
1311 goto repeat; 1324 goto repeat;
1312 } 1325 }
1313 } else if (sgp == SGP_READ && !filepage) { 1326 } else if (sgp == SGP_READ && !filepage) {
@@ -1353,6 +1366,17 @@ repeat:
1353 goto failed; 1366 goto failed;
1354 } 1367 }
1355 1368
1369 /* Precharge page while we can wait, compensate after */
1370 error = mem_cgroup_cache_charge(filepage, current->mm,
1371 gfp & ~__GFP_HIGHMEM);
1372 if (error) {
1373 page_cache_release(filepage);
1374 shmem_unacct_blocks(info->flags, 1);
1375 shmem_free_blocks(inode, 1);
1376 filepage = NULL;
1377 goto failed;
1378 }
1379
1356 spin_lock(&info->lock); 1380 spin_lock(&info->lock);
1357 entry = shmem_swp_alloc(info, idx, sgp); 1381 entry = shmem_swp_alloc(info, idx, sgp);
1358 if (IS_ERR(entry)) 1382 if (IS_ERR(entry))
@@ -1364,6 +1388,7 @@ repeat:
1364 if (error || swap.val || 0 != add_to_page_cache_lru( 1388 if (error || swap.val || 0 != add_to_page_cache_lru(
1365 filepage, mapping, idx, GFP_NOWAIT)) { 1389 filepage, mapping, idx, GFP_NOWAIT)) {
1366 spin_unlock(&info->lock); 1390 spin_unlock(&info->lock);
1391 mem_cgroup_uncharge_page(filepage);
1367 page_cache_release(filepage); 1392 page_cache_release(filepage);
1368 shmem_unacct_blocks(info->flags, 1); 1393 shmem_unacct_blocks(info->flags, 1);
1369 shmem_free_blocks(inode, 1); 1394 shmem_free_blocks(inode, 1);
@@ -1372,6 +1397,7 @@ repeat:
1372 goto failed; 1397 goto failed;
1373 goto repeat; 1398 goto repeat;
1374 } 1399 }
1400 mem_cgroup_uncharge_page(filepage);
1375 info->flags |= SHMEM_PAGEIN; 1401 info->flags |= SHMEM_PAGEIN;
1376 } 1402 }
1377 1403