aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2010-05-26 17:42:39 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-27 12:12:43 -0400
commit87946a72283be3de936adc754b7007df7d3e6aeb (patch)
tree0593c87ba36bae13d6a6d5dda65ebb41354954f8
parent90254a65833b67502d14736410b3857a15535c67 (diff)
memcg: move charge of file pages
This patch adds support for moving charge of file pages, which include normal file, tmpfs file and swaps of tmpfs file. It's enabled by setting bit 1 of <target cgroup>/memory.move_charge_at_immigrate. Unlike the case of anonymous pages, file pages(and swaps) in the range mmapped by the task will be moved even if the task hasn't done page fault, i.e. they might not be the task's "RSS", but other task's "RSS" that maps the same file. And mapcount of the page is ignored(the page can be moved even if page_mapcount(page) > 1). So, conditions that the page/swap should be met to be moved is that it must be in the range mmapped by the target task and it must be charged to the old cgroup. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix warning] Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/cgroups/memory.txt18
-rw-r--r--include/linux/swap.h5
-rw-r--r--mm/memcontrol.c56
-rw-r--r--mm/shmem.c64
4 files changed, 125 insertions, 18 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 44e7ded33448..5e028870ee8a 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -454,21 +454,27 @@ And if you want disable it again:
4548.2 Type of charges which can be move 4548.2 Type of charges which can be move
455 455
456Each bits of move_charge_at_immigrate has its own meaning about what type of 456Each bits of move_charge_at_immigrate has its own meaning about what type of
457charges should be moved. 457charges should be moved. But in any cases, it must be noted that an account of
458a page or a swap can be moved only when it is charged to the task's current(old)
459memory cgroup.
458 460
459 bit | what type of charges would be moved ? 461 bit | what type of charges would be moved ?
460 -----+------------------------------------------------------------------------ 462 -----+------------------------------------------------------------------------
461 0 | A charge of an anonymous page(or swap of it) used by the target task. 463 0 | A charge of an anonymous page(or swap of it) used by the target task.
462 | Those pages and swaps must be used only by the target task. You must 464 | Those pages and swaps must be used only by the target task. You must
463 | enable Swap Extension(see 2.4) to enable move of swap charges. 465 | enable Swap Extension(see 2.4) to enable move of swap charges.
464 466 -----+------------------------------------------------------------------------
465Note: Those pages and swaps must be charged to the old cgroup. 467 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory)
466Note: More type of pages(e.g. file cache, shmem,) will be supported by other 468 | and swaps of tmpfs file) mmaped by the target task. Unlike the case of
467 bits in future. 469 | anonymous pages, file pages(and swaps) in the range mmapped by the task
470 | will be moved even if the task hasn't done page fault, i.e. they might
471 | not be the task's "RSS", but other task's "RSS" that maps the same file.
472 | And mapcount of the page is ignored(the page can be moved even if
473 | page_mapcount(page) > 1). You must enable Swap Extension(see 2.4) to
474 | enable move of swap charges.
468 475
4698.3 TODO 4768.3 TODO
470 477
471- Add support for other types of pages(e.g. file cache, shmem, etc.).
472- Implement madvise(2) to let users decide the vma to be moved or not to be 478- Implement madvise(2) to let users decide the vma to be moved or not to be
473 moved. 479 moved.
474- All of moving charge operations are done under cgroup_mutex. It's not good 480- All of moving charge operations are done under cgroup_mutex. It's not good
diff --git a/include/linux/swap.h b/include/linux/swap.h
index b6b614364dd8..ff4acea9bbdb 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -282,6 +282,11 @@ extern void kswapd_stop(int nid);
282extern int shmem_unuse(swp_entry_t entry, struct page *page); 282extern int shmem_unuse(swp_entry_t entry, struct page *page);
283#endif /* CONFIG_MMU */ 283#endif /* CONFIG_MMU */
284 284
285#ifdef CONFIG_CGROUP_MEM_RES_CTLR
286extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
287 struct page **pagep, swp_entry_t *ent);
288#endif
289
285extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); 290extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *);
286 291
287#ifdef CONFIG_SWAP 292#ifdef CONFIG_SWAP
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e5277e8a42a8..be5f478351bd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -250,6 +250,7 @@ struct mem_cgroup {
250 */ 250 */
251enum move_type { 251enum move_type {
252 MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */ 252 MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */
253 MOVE_CHARGE_TYPE_FILE, /* file page(including tmpfs) and swap of it */
253 NR_MOVE_TYPE, 254 NR_MOVE_TYPE,
254}; 255};
255 256
@@ -272,6 +273,12 @@ static bool move_anon(void)
272 &mc.to->move_charge_at_immigrate); 273 &mc.to->move_charge_at_immigrate);
273} 274}
274 275
276static bool move_file(void)
277{
278 return test_bit(MOVE_CHARGE_TYPE_FILE,
279 &mc.to->move_charge_at_immigrate);
280}
281
275/* 282/*
276 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft 283 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
277 * limit reclaim to prevent infinite loops, if they ever occur. 284 * limit reclaim to prevent infinite loops, if they ever occur.
@@ -4179,11 +4186,8 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
4179 /* we don't move shared anon */ 4186 /* we don't move shared anon */
4180 if (!move_anon() || page_mapcount(page) > 2) 4187 if (!move_anon() || page_mapcount(page) > 2)
4181 return NULL; 4188 return NULL;
4182 } else 4189 } else if (!move_file())
4183 /* 4190 /* we ignore mapcount for file pages */
4184 * TODO: We don't move charges of file(including shmem/tmpfs)
4185 * pages for now.
4186 */
4187 return NULL; 4191 return NULL;
4188 if (!get_page_unless_zero(page)) 4192 if (!get_page_unless_zero(page))
4189 return NULL; 4193 return NULL;
@@ -4212,6 +4216,39 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
4212 return page; 4216 return page;
4213} 4217}
4214 4218
4219static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
4220 unsigned long addr, pte_t ptent, swp_entry_t *entry)
4221{
4222 struct page *page = NULL;
4223 struct inode *inode;
4224 struct address_space *mapping;
4225 pgoff_t pgoff;
4226
4227 if (!vma->vm_file) /* anonymous vma */
4228 return NULL;
4229 if (!move_file())
4230 return NULL;
4231
4232 inode = vma->vm_file->f_path.dentry->d_inode;
4233 mapping = vma->vm_file->f_mapping;
4234 if (pte_none(ptent))
4235 pgoff = linear_page_index(vma, addr);
4236 else /* pte_file(ptent) is true */
4237 pgoff = pte_to_pgoff(ptent);
4238
4239 /* page is moved even if it's not RSS of this task(page-faulted). */
4240 if (!mapping_cap_swap_backed(mapping)) { /* normal file */
4241 page = find_get_page(mapping, pgoff);
4242 } else { /* shmem/tmpfs file. we should take account of swap too. */
4243 swp_entry_t ent;
4244 mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
4245 if (do_swap_account)
4246 entry->val = ent.val;
4247 }
4248
4249 return page;
4250}
4251
4215static int is_target_pte_for_mc(struct vm_area_struct *vma, 4252static int is_target_pte_for_mc(struct vm_area_struct *vma,
4216 unsigned long addr, pte_t ptent, union mc_target *target) 4253 unsigned long addr, pte_t ptent, union mc_target *target)
4217{ 4254{
@@ -4224,7 +4261,8 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma,
4224 page = mc_handle_present_pte(vma, addr, ptent); 4261 page = mc_handle_present_pte(vma, addr, ptent);
4225 else if (is_swap_pte(ptent)) 4262 else if (is_swap_pte(ptent))
4226 page = mc_handle_swap_pte(vma, addr, ptent, &ent); 4263 page = mc_handle_swap_pte(vma, addr, ptent, &ent);
4227 /* TODO: handle swap of shmes/tmpfs */ 4264 else if (pte_none(ptent) || pte_file(ptent))
4265 page = mc_handle_file_pte(vma, addr, ptent, &ent);
4228 4266
4229 if (!page && !ent.val) 4267 if (!page && !ent.val)
4230 return 0; 4268 return 0;
@@ -4285,9 +4323,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4285 }; 4323 };
4286 if (is_vm_hugetlb_page(vma)) 4324 if (is_vm_hugetlb_page(vma))
4287 continue; 4325 continue;
4288 /* TODO: We don't move charges of shmem/tmpfs pages for now. */
4289 if (vma->vm_flags & VM_SHARED)
4290 continue;
4291 walk_page_range(vma->vm_start, vma->vm_end, 4326 walk_page_range(vma->vm_start, vma->vm_end,
4292 &mem_cgroup_count_precharge_walk); 4327 &mem_cgroup_count_precharge_walk);
4293 } 4328 }
@@ -4484,9 +4519,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4484 }; 4519 };
4485 if (is_vm_hugetlb_page(vma)) 4520 if (is_vm_hugetlb_page(vma))
4486 continue; 4521 continue;
4487 /* TODO: We don't move charges of shmem/tmpfs pages for now. */
4488 if (vma->vm_flags & VM_SHARED)
4489 continue;
4490 ret = walk_page_range(vma->vm_start, vma->vm_end, 4522 ret = walk_page_range(vma->vm_start, vma->vm_end,
4491 &mem_cgroup_move_charge_walk); 4523 &mem_cgroup_move_charge_walk);
4492 if (ret) 4524 if (ret)
diff --git a/mm/shmem.c b/mm/shmem.c
index 4ef9797bd430..855eaf5b8d5b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2559,6 +2559,45 @@ out4:
2559 return error; 2559 return error;
2560} 2560}
2561 2561
2562#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2563/**
2564 * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
2565 * @inode: the inode to be searched
2566 * @pgoff: the offset to be searched
2567 * @pagep: the pointer for the found page to be stored
2568 * @ent: the pointer for the found swap entry to be stored
2569 *
2570 * If a page is found, refcount of it is incremented. Callers should handle
2571 * these refcount.
2572 */
2573void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2574 struct page **pagep, swp_entry_t *ent)
2575{
2576 swp_entry_t entry = { .val = 0 }, *ptr;
2577 struct page *page = NULL;
2578 struct shmem_inode_info *info = SHMEM_I(inode);
2579
2580 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2581 goto out;
2582
2583 spin_lock(&info->lock);
2584 ptr = shmem_swp_entry(info, pgoff, NULL);
2585#ifdef CONFIG_SWAP
2586 if (ptr && ptr->val) {
2587 entry.val = ptr->val;
2588 page = find_get_page(&swapper_space, entry.val);
2589 } else
2590#endif
2591 page = find_get_page(inode->i_mapping, pgoff);
2592 if (ptr)
2593 shmem_swp_unmap(ptr);
2594 spin_unlock(&info->lock);
2595out:
2596 *pagep = page;
2597 *ent = entry;
2598}
2599#endif
2600
2562#else /* !CONFIG_SHMEM */ 2601#else /* !CONFIG_SHMEM */
2563 2602
2564/* 2603/*
@@ -2598,6 +2637,31 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
2598 return 0; 2637 return 0;
2599} 2638}
2600 2639
2640#ifdef CONFIG_CGROUP_MEM_RES_CTLR
2641/**
2642 * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file
2643 * @inode: the inode to be searched
2644 * @pgoff: the offset to be searched
2645 * @pagep: the pointer for the found page to be stored
2646 * @ent: the pointer for the found swap entry to be stored
2647 *
2648 * If a page is found, refcount of it is incremented. Callers should handle
2649 * these refcount.
2650 */
2651void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff,
2652 struct page **pagep, swp_entry_t *ent)
2653{
2654 struct page *page = NULL;
2655
2656 if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode))
2657 goto out;
2658 page = find_get_page(inode->i_mapping, pgoff);
2659out:
2660 *pagep = page;
2661 *ent = (swp_entry_t){ .val = 0 };
2662}
2663#endif
2664
2601#define shmem_vm_ops generic_file_vm_ops 2665#define shmem_vm_ops generic_file_vm_ops
2602#define shmem_file_operations ramfs_file_operations 2666#define shmem_file_operations ramfs_file_operations
2603#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) 2667#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)