diff options
-rw-r--r-- | Documentation/cgroups/memory.txt | 18 | ||||
-rw-r--r-- | include/linux/swap.h | 5 | ||||
-rw-r--r-- | mm/memcontrol.c | 56 | ||||
-rw-r--r-- | mm/shmem.c | 64 |
4 files changed, 125 insertions, 18 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 44e7ded33448..5e028870ee8a 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -454,21 +454,27 @@ And if you want disable it again: | |||
454 | 8.2 Type of charges which can be move | 454 | 8.2 Type of charges which can be move |
455 | 455 | ||
456 | Each bits of move_charge_at_immigrate has its own meaning about what type of | 456 | Each bits of move_charge_at_immigrate has its own meaning about what type of |
457 | charges should be moved. | 457 | charges should be moved. But in any cases, it must be noted that an account of |
458 | a page or a swap can be moved only when it is charged to the task's current(old) | ||
459 | memory cgroup. | ||
458 | 460 | ||
459 | bit | what type of charges would be moved ? | 461 | bit | what type of charges would be moved ? |
460 | -----+------------------------------------------------------------------------ | 462 | -----+------------------------------------------------------------------------ |
461 | 0 | A charge of an anonymous page(or swap of it) used by the target task. | 463 | 0 | A charge of an anonymous page(or swap of it) used by the target task. |
462 | | Those pages and swaps must be used only by the target task. You must | 464 | | Those pages and swaps must be used only by the target task. You must |
463 | | enable Swap Extension(see 2.4) to enable move of swap charges. | 465 | | enable Swap Extension(see 2.4) to enable move of swap charges. |
464 | 466 | -----+------------------------------------------------------------------------ | |
465 | Note: Those pages and swaps must be charged to the old cgroup. | 467 | 1 | A charge of file pages(normal file, tmpfs file(e.g. ipc shared memory) |
466 | Note: More type of pages(e.g. file cache, shmem,) will be supported by other | 468 | | and swaps of tmpfs file) mmaped by the target task. Unlike the case of |
467 | bits in future. | 469 | | anonymous pages, file pages(and swaps) in the range mmapped by the task |
470 | | will be moved even if the task hasn't done page fault, i.e. they might | ||
471 | | not be the task's "RSS", but other task's "RSS" that maps the same file. | ||
472 | | And mapcount of the page is ignored(the page can be moved even if | ||
473 | | page_mapcount(page) > 1). You must enable Swap Extension(see 2.4) to | ||
474 | | enable move of swap charges. | ||
468 | 475 | ||
469 | 8.3 TODO | 476 | 8.3 TODO |
470 | 477 | ||
471 | - Add support for other types of pages(e.g. file cache, shmem, etc.). | ||
472 | - Implement madvise(2) to let users decide the vma to be moved or not to be | 478 | - Implement madvise(2) to let users decide the vma to be moved or not to be |
473 | moved. | 479 | moved. |
474 | - All of moving charge operations are done under cgroup_mutex. It's not good | 480 | - All of moving charge operations are done under cgroup_mutex. It's not good |
diff --git a/include/linux/swap.h b/include/linux/swap.h index b6b614364dd8..ff4acea9bbdb 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -282,6 +282,11 @@ extern void kswapd_stop(int nid); | |||
282 | extern int shmem_unuse(swp_entry_t entry, struct page *page); | 282 | extern int shmem_unuse(swp_entry_t entry, struct page *page); |
283 | #endif /* CONFIG_MMU */ | 283 | #endif /* CONFIG_MMU */ |
284 | 284 | ||
285 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
286 | extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
287 | struct page **pagep, swp_entry_t *ent); | ||
288 | #endif | ||
289 | |||
285 | extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); | 290 | extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); |
286 | 291 | ||
287 | #ifdef CONFIG_SWAP | 292 | #ifdef CONFIG_SWAP |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e5277e8a42a8..be5f478351bd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -250,6 +250,7 @@ struct mem_cgroup { | |||
250 | */ | 250 | */ |
251 | enum move_type { | 251 | enum move_type { |
252 | MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */ | 252 | MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */ |
253 | MOVE_CHARGE_TYPE_FILE, /* file page(including tmpfs) and swap of it */ | ||
253 | NR_MOVE_TYPE, | 254 | NR_MOVE_TYPE, |
254 | }; | 255 | }; |
255 | 256 | ||
@@ -272,6 +273,12 @@ static bool move_anon(void) | |||
272 | &mc.to->move_charge_at_immigrate); | 273 | &mc.to->move_charge_at_immigrate); |
273 | } | 274 | } |
274 | 275 | ||
276 | static bool move_file(void) | ||
277 | { | ||
278 | return test_bit(MOVE_CHARGE_TYPE_FILE, | ||
279 | &mc.to->move_charge_at_immigrate); | ||
280 | } | ||
281 | |||
275 | /* | 282 | /* |
276 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft | 283 | * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft |
277 | * limit reclaim to prevent infinite loops, if they ever occur. | 284 | * limit reclaim to prevent infinite loops, if they ever occur. |
@@ -4179,11 +4186,8 @@ static struct page *mc_handle_present_pte(struct vm_area_struct *vma, | |||
4179 | /* we don't move shared anon */ | 4186 | /* we don't move shared anon */ |
4180 | if (!move_anon() || page_mapcount(page) > 2) | 4187 | if (!move_anon() || page_mapcount(page) > 2) |
4181 | return NULL; | 4188 | return NULL; |
4182 | } else | 4189 | } else if (!move_file()) |
4183 | /* | 4190 | /* we ignore mapcount for file pages */ |
4184 | * TODO: We don't move charges of file(including shmem/tmpfs) | ||
4185 | * pages for now. | ||
4186 | */ | ||
4187 | return NULL; | 4191 | return NULL; |
4188 | if (!get_page_unless_zero(page)) | 4192 | if (!get_page_unless_zero(page)) |
4189 | return NULL; | 4193 | return NULL; |
@@ -4212,6 +4216,39 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, | |||
4212 | return page; | 4216 | return page; |
4213 | } | 4217 | } |
4214 | 4218 | ||
4219 | static struct page *mc_handle_file_pte(struct vm_area_struct *vma, | ||
4220 | unsigned long addr, pte_t ptent, swp_entry_t *entry) | ||
4221 | { | ||
4222 | struct page *page = NULL; | ||
4223 | struct inode *inode; | ||
4224 | struct address_space *mapping; | ||
4225 | pgoff_t pgoff; | ||
4226 | |||
4227 | if (!vma->vm_file) /* anonymous vma */ | ||
4228 | return NULL; | ||
4229 | if (!move_file()) | ||
4230 | return NULL; | ||
4231 | |||
4232 | inode = vma->vm_file->f_path.dentry->d_inode; | ||
4233 | mapping = vma->vm_file->f_mapping; | ||
4234 | if (pte_none(ptent)) | ||
4235 | pgoff = linear_page_index(vma, addr); | ||
4236 | else /* pte_file(ptent) is true */ | ||
4237 | pgoff = pte_to_pgoff(ptent); | ||
4238 | |||
4239 | /* page is moved even if it's not RSS of this task(page-faulted). */ | ||
4240 | if (!mapping_cap_swap_backed(mapping)) { /* normal file */ | ||
4241 | page = find_get_page(mapping, pgoff); | ||
4242 | } else { /* shmem/tmpfs file. we should take account of swap too. */ | ||
4243 | swp_entry_t ent; | ||
4244 | mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent); | ||
4245 | if (do_swap_account) | ||
4246 | entry->val = ent.val; | ||
4247 | } | ||
4248 | |||
4249 | return page; | ||
4250 | } | ||
4251 | |||
4215 | static int is_target_pte_for_mc(struct vm_area_struct *vma, | 4252 | static int is_target_pte_for_mc(struct vm_area_struct *vma, |
4216 | unsigned long addr, pte_t ptent, union mc_target *target) | 4253 | unsigned long addr, pte_t ptent, union mc_target *target) |
4217 | { | 4254 | { |
@@ -4224,7 +4261,8 @@ static int is_target_pte_for_mc(struct vm_area_struct *vma, | |||
4224 | page = mc_handle_present_pte(vma, addr, ptent); | 4261 | page = mc_handle_present_pte(vma, addr, ptent); |
4225 | else if (is_swap_pte(ptent)) | 4262 | else if (is_swap_pte(ptent)) |
4226 | page = mc_handle_swap_pte(vma, addr, ptent, &ent); | 4263 | page = mc_handle_swap_pte(vma, addr, ptent, &ent); |
4227 | /* TODO: handle swap of shmes/tmpfs */ | 4264 | else if (pte_none(ptent) || pte_file(ptent)) |
4265 | page = mc_handle_file_pte(vma, addr, ptent, &ent); | ||
4228 | 4266 | ||
4229 | if (!page && !ent.val) | 4267 | if (!page && !ent.val) |
4230 | return 0; | 4268 | return 0; |
@@ -4285,9 +4323,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4285 | }; | 4323 | }; |
4286 | if (is_vm_hugetlb_page(vma)) | 4324 | if (is_vm_hugetlb_page(vma)) |
4287 | continue; | 4325 | continue; |
4288 | /* TODO: We don't move charges of shmem/tmpfs pages for now. */ | ||
4289 | if (vma->vm_flags & VM_SHARED) | ||
4290 | continue; | ||
4291 | walk_page_range(vma->vm_start, vma->vm_end, | 4326 | walk_page_range(vma->vm_start, vma->vm_end, |
4292 | &mem_cgroup_count_precharge_walk); | 4327 | &mem_cgroup_count_precharge_walk); |
4293 | } | 4328 | } |
@@ -4484,9 +4519,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4484 | }; | 4519 | }; |
4485 | if (is_vm_hugetlb_page(vma)) | 4520 | if (is_vm_hugetlb_page(vma)) |
4486 | continue; | 4521 | continue; |
4487 | /* TODO: We don't move charges of shmem/tmpfs pages for now. */ | ||
4488 | if (vma->vm_flags & VM_SHARED) | ||
4489 | continue; | ||
4490 | ret = walk_page_range(vma->vm_start, vma->vm_end, | 4522 | ret = walk_page_range(vma->vm_start, vma->vm_end, |
4491 | &mem_cgroup_move_charge_walk); | 4523 | &mem_cgroup_move_charge_walk); |
4492 | if (ret) | 4524 | if (ret) |
diff --git a/mm/shmem.c b/mm/shmem.c index 4ef9797bd430..855eaf5b8d5b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2559,6 +2559,45 @@ out4: | |||
2559 | return error; | 2559 | return error; |
2560 | } | 2560 | } |
2561 | 2561 | ||
2562 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
2563 | /** | ||
2564 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | ||
2565 | * @inode: the inode to be searched | ||
2566 | * @pgoff: the offset to be searched | ||
2567 | * @pagep: the pointer for the found page to be stored | ||
2568 | * @ent: the pointer for the found swap entry to be stored | ||
2569 | * | ||
2570 | * If a page is found, refcount of it is incremented. Callers should handle | ||
2571 | * these refcount. | ||
2572 | */ | ||
2573 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
2574 | struct page **pagep, swp_entry_t *ent) | ||
2575 | { | ||
2576 | swp_entry_t entry = { .val = 0 }, *ptr; | ||
2577 | struct page *page = NULL; | ||
2578 | struct shmem_inode_info *info = SHMEM_I(inode); | ||
2579 | |||
2580 | if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
2581 | goto out; | ||
2582 | |||
2583 | spin_lock(&info->lock); | ||
2584 | ptr = shmem_swp_entry(info, pgoff, NULL); | ||
2585 | #ifdef CONFIG_SWAP | ||
2586 | if (ptr && ptr->val) { | ||
2587 | entry.val = ptr->val; | ||
2588 | page = find_get_page(&swapper_space, entry.val); | ||
2589 | } else | ||
2590 | #endif | ||
2591 | page = find_get_page(inode->i_mapping, pgoff); | ||
2592 | if (ptr) | ||
2593 | shmem_swp_unmap(ptr); | ||
2594 | spin_unlock(&info->lock); | ||
2595 | out: | ||
2596 | *pagep = page; | ||
2597 | *ent = entry; | ||
2598 | } | ||
2599 | #endif | ||
2600 | |||
2562 | #else /* !CONFIG_SHMEM */ | 2601 | #else /* !CONFIG_SHMEM */ |
2563 | 2602 | ||
2564 | /* | 2603 | /* |
@@ -2598,6 +2637,31 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) | |||
2598 | return 0; | 2637 | return 0; |
2599 | } | 2638 | } |
2600 | 2639 | ||
2640 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | ||
2641 | /** | ||
2642 | * mem_cgroup_get_shmem_target - find a page or entry assigned to the shmem file | ||
2643 | * @inode: the inode to be searched | ||
2644 | * @pgoff: the offset to be searched | ||
2645 | * @pagep: the pointer for the found page to be stored | ||
2646 | * @ent: the pointer for the found swap entry to be stored | ||
2647 | * | ||
2648 | * If a page is found, refcount of it is incremented. Callers should handle | ||
2649 | * these refcount. | ||
2650 | */ | ||
2651 | void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, | ||
2652 | struct page **pagep, swp_entry_t *ent) | ||
2653 | { | ||
2654 | struct page *page = NULL; | ||
2655 | |||
2656 | if ((pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) | ||
2657 | goto out; | ||
2658 | page = find_get_page(inode->i_mapping, pgoff); | ||
2659 | out: | ||
2660 | *pagep = page; | ||
2661 | *ent = (swp_entry_t){ .val = 0 }; | ||
2662 | } | ||
2663 | #endif | ||
2664 | |||
2601 | #define shmem_vm_ops generic_file_vm_ops | 2665 | #define shmem_vm_ops generic_file_vm_ops |
2602 | #define shmem_file_operations ramfs_file_operations | 2666 | #define shmem_file_operations ramfs_file_operations |
2603 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) | 2667 | #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) |