diff options
-rw-r--r-- | Documentation/cgroups/memcg_test.txt | 32 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 53 | ||||
-rw-r--r-- | include/linux/swap.h | 3 | ||||
-rw-r--r-- | kernel/events/uprobes.c | 15 | ||||
-rw-r--r-- | mm/filemap.c | 21 | ||||
-rw-r--r-- | mm/huge_memory.c | 57 | ||||
-rw-r--r-- | mm/memcontrol.c | 407 | ||||
-rw-r--r-- | mm/memory.c | 41 | ||||
-rw-r--r-- | mm/rmap.c | 19 | ||||
-rw-r--r-- | mm/shmem.c | 37 | ||||
-rw-r--r-- | mm/swap.c | 34 | ||||
-rw-r--r-- | mm/swapfile.c | 14 |
12 files changed, 338 insertions, 395 deletions
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt index 80ac454704b8..bcf750d3cecd 100644 --- a/Documentation/cgroups/memcg_test.txt +++ b/Documentation/cgroups/memcg_test.txt | |||
@@ -24,24 +24,7 @@ Please note that implementation details can be changed. | |||
24 | 24 | ||
25 | a page/swp_entry may be charged (usage += PAGE_SIZE) at | 25 | a page/swp_entry may be charged (usage += PAGE_SIZE) at |
26 | 26 | ||
27 | mem_cgroup_charge_anon() | 27 | mem_cgroup_try_charge() |
28 | Called at new page fault and Copy-On-Write. | ||
29 | |||
30 | mem_cgroup_try_charge_swapin() | ||
31 | Called at do_swap_page() (page fault on swap entry) and swapoff. | ||
32 | Followed by charge-commit-cancel protocol. (With swap accounting) | ||
33 | At commit, a charge recorded in swap_cgroup is removed. | ||
34 | |||
35 | mem_cgroup_charge_file() | ||
36 | Called at add_to_page_cache() | ||
37 | |||
38 | mem_cgroup_cache_charge_swapin() | ||
39 | Called at shmem's swapin. | ||
40 | |||
41 | mem_cgroup_prepare_migration() | ||
42 | Called before migration. "extra" charge is done and followed by | ||
43 | charge-commit-cancel protocol. | ||
44 | At commit, charge against oldpage or newpage will be committed. | ||
45 | 28 | ||
46 | 2. Uncharge | 29 | 2. Uncharge |
47 | a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by | 30 | a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by |
@@ -69,19 +52,14 @@ Please note that implementation details can be changed. | |||
69 | to new page is committed. At failure, charge to old page is committed. | 52 | to new page is committed. At failure, charge to old page is committed. |
70 | 53 | ||
71 | 3. charge-commit-cancel | 54 | 3. charge-commit-cancel |
72 | In some case, we can't know this "charge" is valid or not at charging | 55 | Memcg pages are charged in two steps: |
73 | (because of races). | 56 | mem_cgroup_try_charge() |
74 | To handle such case, there are charge-commit-cancel functions. | 57 | mem_cgroup_commit_charge() or mem_cgroup_cancel_charge() |
75 | mem_cgroup_try_charge_XXX | ||
76 | mem_cgroup_commit_charge_XXX | ||
77 | mem_cgroup_cancel_charge_XXX | ||
78 | these are used in swap-in and migration. | ||
79 | 58 | ||
80 | At try_charge(), there are no flags to say "this page is charged". | 59 | At try_charge(), there are no flags to say "this page is charged". |
81 | at this point, usage += PAGE_SIZE. | 60 | at this point, usage += PAGE_SIZE. |
82 | 61 | ||
83 | At commit(), the function checks the page should be charged or not | 62 | At commit(), the page is associated with the memcg. |
84 | and set flags or avoid charging.(usage -= PAGE_SIZE) | ||
85 | 63 | ||
86 | At cancel(), simply usage -= PAGE_SIZE. | 64 | At cancel(), simply usage -= PAGE_SIZE. |
87 | 65 | ||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eb65d29516ca..1a9a096858e0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -54,28 +54,11 @@ struct mem_cgroup_reclaim_cookie { | |||
54 | }; | 54 | }; |
55 | 55 | ||
56 | #ifdef CONFIG_MEMCG | 56 | #ifdef CONFIG_MEMCG |
57 | /* | 57 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, |
58 | * All "charge" functions with gfp_mask should use GFP_KERNEL or | 58 | gfp_t gfp_mask, struct mem_cgroup **memcgp); |
59 | * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't | 59 | void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, |
60 | * alloc memory but reclaims memory from all available zones. So, "where I want | 60 | bool lrucare); |
61 | * memory from" bits of gfp_mask has no meaning. So any bits of that field is | 61 | void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); |
62 | * available but adding a rule is better. charge functions' gfp_mask should | ||
63 | * be set to GFP_KERNEL or gfp_mask & GFP_RECLAIM_MASK for avoiding ambiguous | ||
64 | * codes. | ||
65 | * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) | ||
66 | */ | ||
67 | |||
68 | extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, | ||
69 | gfp_t gfp_mask); | ||
70 | /* for swap handling */ | ||
71 | extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | ||
72 | struct page *page, gfp_t mask, struct mem_cgroup **memcgp); | ||
73 | extern void mem_cgroup_commit_charge_swapin(struct page *page, | ||
74 | struct mem_cgroup *memcg); | ||
75 | extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg); | ||
76 | |||
77 | extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, | ||
78 | gfp_t gfp_mask); | ||
79 | 62 | ||
80 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); | 63 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); |
81 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); | 64 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); |
@@ -233,30 +216,22 @@ void mem_cgroup_print_bad_page(struct page *page); | |||
233 | #else /* CONFIG_MEMCG */ | 216 | #else /* CONFIG_MEMCG */ |
234 | struct mem_cgroup; | 217 | struct mem_cgroup; |
235 | 218 | ||
236 | static inline int mem_cgroup_charge_anon(struct page *page, | 219 | static inline int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, |
237 | struct mm_struct *mm, gfp_t gfp_mask) | 220 | gfp_t gfp_mask, |
238 | { | 221 | struct mem_cgroup **memcgp) |
239 | return 0; | ||
240 | } | ||
241 | |||
242 | static inline int mem_cgroup_charge_file(struct page *page, | ||
243 | struct mm_struct *mm, gfp_t gfp_mask) | ||
244 | { | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static inline int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | ||
249 | struct page *page, gfp_t gfp_mask, struct mem_cgroup **memcgp) | ||
250 | { | 222 | { |
223 | *memcgp = NULL; | ||
251 | return 0; | 224 | return 0; |
252 | } | 225 | } |
253 | 226 | ||
254 | static inline void mem_cgroup_commit_charge_swapin(struct page *page, | 227 | static inline void mem_cgroup_commit_charge(struct page *page, |
255 | struct mem_cgroup *memcg) | 228 | struct mem_cgroup *memcg, |
229 | bool lrucare) | ||
256 | { | 230 | { |
257 | } | 231 | } |
258 | 232 | ||
259 | static inline void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) | 233 | static inline void mem_cgroup_cancel_charge(struct page *page, |
234 | struct mem_cgroup *memcg) | ||
260 | { | 235 | { |
261 | } | 236 | } |
262 | 237 | ||
diff --git a/include/linux/swap.h b/include/linux/swap.h index 1eb64043c076..46a649e4e8cd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -320,6 +320,9 @@ extern void swap_setup(void); | |||
320 | 320 | ||
321 | extern void add_page_to_unevictable_list(struct page *page); | 321 | extern void add_page_to_unevictable_list(struct page *page); |
322 | 322 | ||
323 | extern void lru_cache_add_active_or_unevictable(struct page *page, | ||
324 | struct vm_area_struct *vma); | ||
325 | |||
323 | /* linux/mm/vmscan.c */ | 326 | /* linux/mm/vmscan.c */ |
324 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 327 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
325 | gfp_t gfp_mask, nodemask_t *mask); | 328 | gfp_t gfp_mask, nodemask_t *mask); |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6f3254e8c137..1d0af8a2c646 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -167,6 +167,11 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
167 | /* For mmu_notifiers */ | 167 | /* For mmu_notifiers */ |
168 | const unsigned long mmun_start = addr; | 168 | const unsigned long mmun_start = addr; |
169 | const unsigned long mmun_end = addr + PAGE_SIZE; | 169 | const unsigned long mmun_end = addr + PAGE_SIZE; |
170 | struct mem_cgroup *memcg; | ||
171 | |||
172 | err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg); | ||
173 | if (err) | ||
174 | return err; | ||
170 | 175 | ||
171 | /* For try_to_free_swap() and munlock_vma_page() below */ | 176 | /* For try_to_free_swap() and munlock_vma_page() below */ |
172 | lock_page(page); | 177 | lock_page(page); |
@@ -179,6 +184,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
179 | 184 | ||
180 | get_page(kpage); | 185 | get_page(kpage); |
181 | page_add_new_anon_rmap(kpage, vma, addr); | 186 | page_add_new_anon_rmap(kpage, vma, addr); |
187 | mem_cgroup_commit_charge(kpage, memcg, false); | ||
188 | lru_cache_add_active_or_unevictable(kpage, vma); | ||
182 | 189 | ||
183 | if (!PageAnon(page)) { | 190 | if (!PageAnon(page)) { |
184 | dec_mm_counter(mm, MM_FILEPAGES); | 191 | dec_mm_counter(mm, MM_FILEPAGES); |
@@ -200,6 +207,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, | |||
200 | 207 | ||
201 | err = 0; | 208 | err = 0; |
202 | unlock: | 209 | unlock: |
210 | mem_cgroup_cancel_charge(kpage, memcg); | ||
203 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 211 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
204 | unlock_page(page); | 212 | unlock_page(page); |
205 | return err; | 213 | return err; |
@@ -315,18 +323,11 @@ retry: | |||
315 | if (!new_page) | 323 | if (!new_page) |
316 | goto put_old; | 324 | goto put_old; |
317 | 325 | ||
318 | if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) | ||
319 | goto put_new; | ||
320 | |||
321 | __SetPageUptodate(new_page); | 326 | __SetPageUptodate(new_page); |
322 | copy_highpage(new_page, old_page); | 327 | copy_highpage(new_page, old_page); |
323 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); | 328 | copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); |
324 | 329 | ||
325 | ret = __replace_page(vma, vaddr, old_page, new_page); | 330 | ret = __replace_page(vma, vaddr, old_page, new_page); |
326 | if (ret) | ||
327 | mem_cgroup_uncharge_page(new_page); | ||
328 | |||
329 | put_new: | ||
330 | page_cache_release(new_page); | 331 | page_cache_release(new_page); |
331 | put_old: | 332 | put_old: |
332 | put_page(old_page); | 333 | put_page(old_page); |
diff --git a/mm/filemap.c b/mm/filemap.c index af19a6b079f5..349a40e35545 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/cpuset.h> | 32 | #include <linux/cpuset.h> |
33 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ | 33 | #include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */ |
34 | #include <linux/hugetlb.h> | ||
34 | #include <linux/memcontrol.h> | 35 | #include <linux/memcontrol.h> |
35 | #include <linux/cleancache.h> | 36 | #include <linux/cleancache.h> |
36 | #include <linux/rmap.h> | 37 | #include <linux/rmap.h> |
@@ -548,19 +549,24 @@ static int __add_to_page_cache_locked(struct page *page, | |||
548 | pgoff_t offset, gfp_t gfp_mask, | 549 | pgoff_t offset, gfp_t gfp_mask, |
549 | void **shadowp) | 550 | void **shadowp) |
550 | { | 551 | { |
552 | int huge = PageHuge(page); | ||
553 | struct mem_cgroup *memcg; | ||
551 | int error; | 554 | int error; |
552 | 555 | ||
553 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 556 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
554 | VM_BUG_ON_PAGE(PageSwapBacked(page), page); | 557 | VM_BUG_ON_PAGE(PageSwapBacked(page), page); |
555 | 558 | ||
556 | error = mem_cgroup_charge_file(page, current->mm, | 559 | if (!huge) { |
557 | gfp_mask & GFP_RECLAIM_MASK); | 560 | error = mem_cgroup_try_charge(page, current->mm, |
558 | if (error) | 561 | gfp_mask, &memcg); |
559 | return error; | 562 | if (error) |
563 | return error; | ||
564 | } | ||
560 | 565 | ||
561 | error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); | 566 | error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM); |
562 | if (error) { | 567 | if (error) { |
563 | mem_cgroup_uncharge_cache_page(page); | 568 | if (!huge) |
569 | mem_cgroup_cancel_charge(page, memcg); | ||
564 | return error; | 570 | return error; |
565 | } | 571 | } |
566 | 572 | ||
@@ -575,13 +581,16 @@ static int __add_to_page_cache_locked(struct page *page, | |||
575 | goto err_insert; | 581 | goto err_insert; |
576 | __inc_zone_page_state(page, NR_FILE_PAGES); | 582 | __inc_zone_page_state(page, NR_FILE_PAGES); |
577 | spin_unlock_irq(&mapping->tree_lock); | 583 | spin_unlock_irq(&mapping->tree_lock); |
584 | if (!huge) | ||
585 | mem_cgroup_commit_charge(page, memcg, false); | ||
578 | trace_mm_filemap_add_to_page_cache(page); | 586 | trace_mm_filemap_add_to_page_cache(page); |
579 | return 0; | 587 | return 0; |
580 | err_insert: | 588 | err_insert: |
581 | page->mapping = NULL; | 589 | page->mapping = NULL; |
582 | /* Leave page->index set: truncation relies upon it */ | 590 | /* Leave page->index set: truncation relies upon it */ |
583 | spin_unlock_irq(&mapping->tree_lock); | 591 | spin_unlock_irq(&mapping->tree_lock); |
584 | mem_cgroup_uncharge_cache_page(page); | 592 | if (!huge) |
593 | mem_cgroup_cancel_charge(page, memcg); | ||
585 | page_cache_release(page); | 594 | page_cache_release(page); |
586 | return error; | 595 | return error; |
587 | } | 596 | } |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 3630d577e987..d9a21d06b862 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -715,13 +715,20 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
715 | unsigned long haddr, pmd_t *pmd, | 715 | unsigned long haddr, pmd_t *pmd, |
716 | struct page *page) | 716 | struct page *page) |
717 | { | 717 | { |
718 | struct mem_cgroup *memcg; | ||
718 | pgtable_t pgtable; | 719 | pgtable_t pgtable; |
719 | spinlock_t *ptl; | 720 | spinlock_t *ptl; |
720 | 721 | ||
721 | VM_BUG_ON_PAGE(!PageCompound(page), page); | 722 | VM_BUG_ON_PAGE(!PageCompound(page), page); |
723 | |||
724 | if (mem_cgroup_try_charge(page, mm, GFP_TRANSHUGE, &memcg)) | ||
725 | return VM_FAULT_OOM; | ||
726 | |||
722 | pgtable = pte_alloc_one(mm, haddr); | 727 | pgtable = pte_alloc_one(mm, haddr); |
723 | if (unlikely(!pgtable)) | 728 | if (unlikely(!pgtable)) { |
729 | mem_cgroup_cancel_charge(page, memcg); | ||
724 | return VM_FAULT_OOM; | 730 | return VM_FAULT_OOM; |
731 | } | ||
725 | 732 | ||
726 | clear_huge_page(page, haddr, HPAGE_PMD_NR); | 733 | clear_huge_page(page, haddr, HPAGE_PMD_NR); |
727 | /* | 734 | /* |
@@ -734,7 +741,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
734 | ptl = pmd_lock(mm, pmd); | 741 | ptl = pmd_lock(mm, pmd); |
735 | if (unlikely(!pmd_none(*pmd))) { | 742 | if (unlikely(!pmd_none(*pmd))) { |
736 | spin_unlock(ptl); | 743 | spin_unlock(ptl); |
737 | mem_cgroup_uncharge_page(page); | 744 | mem_cgroup_cancel_charge(page, memcg); |
738 | put_page(page); | 745 | put_page(page); |
739 | pte_free(mm, pgtable); | 746 | pte_free(mm, pgtable); |
740 | } else { | 747 | } else { |
@@ -742,6 +749,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
742 | entry = mk_huge_pmd(page, vma->vm_page_prot); | 749 | entry = mk_huge_pmd(page, vma->vm_page_prot); |
743 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 750 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
744 | page_add_new_anon_rmap(page, vma, haddr); | 751 | page_add_new_anon_rmap(page, vma, haddr); |
752 | mem_cgroup_commit_charge(page, memcg, false); | ||
753 | lru_cache_add_active_or_unevictable(page, vma); | ||
745 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 754 | pgtable_trans_huge_deposit(mm, pmd, pgtable); |
746 | set_pmd_at(mm, haddr, pmd, entry); | 755 | set_pmd_at(mm, haddr, pmd, entry); |
747 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); | 756 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
@@ -827,13 +836,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
827 | count_vm_event(THP_FAULT_FALLBACK); | 836 | count_vm_event(THP_FAULT_FALLBACK); |
828 | return VM_FAULT_FALLBACK; | 837 | return VM_FAULT_FALLBACK; |
829 | } | 838 | } |
830 | if (unlikely(mem_cgroup_charge_anon(page, mm, GFP_TRANSHUGE))) { | ||
831 | put_page(page); | ||
832 | count_vm_event(THP_FAULT_FALLBACK); | ||
833 | return VM_FAULT_FALLBACK; | ||
834 | } | ||
835 | if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { | 839 | if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { |
836 | mem_cgroup_uncharge_page(page); | ||
837 | put_page(page); | 840 | put_page(page); |
838 | count_vm_event(THP_FAULT_FALLBACK); | 841 | count_vm_event(THP_FAULT_FALLBACK); |
839 | return VM_FAULT_FALLBACK; | 842 | return VM_FAULT_FALLBACK; |
@@ -979,6 +982,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
979 | struct page *page, | 982 | struct page *page, |
980 | unsigned long haddr) | 983 | unsigned long haddr) |
981 | { | 984 | { |
985 | struct mem_cgroup *memcg; | ||
982 | spinlock_t *ptl; | 986 | spinlock_t *ptl; |
983 | pgtable_t pgtable; | 987 | pgtable_t pgtable; |
984 | pmd_t _pmd; | 988 | pmd_t _pmd; |
@@ -999,20 +1003,21 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
999 | __GFP_OTHER_NODE, | 1003 | __GFP_OTHER_NODE, |
1000 | vma, address, page_to_nid(page)); | 1004 | vma, address, page_to_nid(page)); |
1001 | if (unlikely(!pages[i] || | 1005 | if (unlikely(!pages[i] || |
1002 | mem_cgroup_charge_anon(pages[i], mm, | 1006 | mem_cgroup_try_charge(pages[i], mm, GFP_KERNEL, |
1003 | GFP_KERNEL))) { | 1007 | &memcg))) { |
1004 | if (pages[i]) | 1008 | if (pages[i]) |
1005 | put_page(pages[i]); | 1009 | put_page(pages[i]); |
1006 | mem_cgroup_uncharge_start(); | ||
1007 | while (--i >= 0) { | 1010 | while (--i >= 0) { |
1008 | mem_cgroup_uncharge_page(pages[i]); | 1011 | memcg = (void *)page_private(pages[i]); |
1012 | set_page_private(pages[i], 0); | ||
1013 | mem_cgroup_cancel_charge(pages[i], memcg); | ||
1009 | put_page(pages[i]); | 1014 | put_page(pages[i]); |
1010 | } | 1015 | } |
1011 | mem_cgroup_uncharge_end(); | ||
1012 | kfree(pages); | 1016 | kfree(pages); |
1013 | ret |= VM_FAULT_OOM; | 1017 | ret |= VM_FAULT_OOM; |
1014 | goto out; | 1018 | goto out; |
1015 | } | 1019 | } |
1020 | set_page_private(pages[i], (unsigned long)memcg); | ||
1016 | } | 1021 | } |
1017 | 1022 | ||
1018 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 1023 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
@@ -1041,7 +1046,11 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
1041 | pte_t *pte, entry; | 1046 | pte_t *pte, entry; |
1042 | entry = mk_pte(pages[i], vma->vm_page_prot); | 1047 | entry = mk_pte(pages[i], vma->vm_page_prot); |
1043 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 1048 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
1049 | memcg = (void *)page_private(pages[i]); | ||
1050 | set_page_private(pages[i], 0); | ||
1044 | page_add_new_anon_rmap(pages[i], vma, haddr); | 1051 | page_add_new_anon_rmap(pages[i], vma, haddr); |
1052 | mem_cgroup_commit_charge(pages[i], memcg, false); | ||
1053 | lru_cache_add_active_or_unevictable(pages[i], vma); | ||
1045 | pte = pte_offset_map(&_pmd, haddr); | 1054 | pte = pte_offset_map(&_pmd, haddr); |
1046 | VM_BUG_ON(!pte_none(*pte)); | 1055 | VM_BUG_ON(!pte_none(*pte)); |
1047 | set_pte_at(mm, haddr, pte, entry); | 1056 | set_pte_at(mm, haddr, pte, entry); |
@@ -1065,12 +1074,12 @@ out: | |||
1065 | out_free_pages: | 1074 | out_free_pages: |
1066 | spin_unlock(ptl); | 1075 | spin_unlock(ptl); |
1067 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | 1076 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); |
1068 | mem_cgroup_uncharge_start(); | ||
1069 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 1077 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
1070 | mem_cgroup_uncharge_page(pages[i]); | 1078 | memcg = (void *)page_private(pages[i]); |
1079 | set_page_private(pages[i], 0); | ||
1080 | mem_cgroup_cancel_charge(pages[i], memcg); | ||
1071 | put_page(pages[i]); | 1081 | put_page(pages[i]); |
1072 | } | 1082 | } |
1073 | mem_cgroup_uncharge_end(); | ||
1074 | kfree(pages); | 1083 | kfree(pages); |
1075 | goto out; | 1084 | goto out; |
1076 | } | 1085 | } |
@@ -1081,6 +1090,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1081 | spinlock_t *ptl; | 1090 | spinlock_t *ptl; |
1082 | int ret = 0; | 1091 | int ret = 0; |
1083 | struct page *page = NULL, *new_page; | 1092 | struct page *page = NULL, *new_page; |
1093 | struct mem_cgroup *memcg; | ||
1084 | unsigned long haddr; | 1094 | unsigned long haddr; |
1085 | unsigned long mmun_start; /* For mmu_notifiers */ | 1095 | unsigned long mmun_start; /* For mmu_notifiers */ |
1086 | unsigned long mmun_end; /* For mmu_notifiers */ | 1096 | unsigned long mmun_end; /* For mmu_notifiers */ |
@@ -1132,7 +1142,8 @@ alloc: | |||
1132 | goto out; | 1142 | goto out; |
1133 | } | 1143 | } |
1134 | 1144 | ||
1135 | if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) { | 1145 | if (unlikely(mem_cgroup_try_charge(new_page, mm, |
1146 | GFP_TRANSHUGE, &memcg))) { | ||
1136 | put_page(new_page); | 1147 | put_page(new_page); |
1137 | if (page) { | 1148 | if (page) { |
1138 | split_huge_page(page); | 1149 | split_huge_page(page); |
@@ -1161,7 +1172,7 @@ alloc: | |||
1161 | put_user_huge_page(page); | 1172 | put_user_huge_page(page); |
1162 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { | 1173 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { |
1163 | spin_unlock(ptl); | 1174 | spin_unlock(ptl); |
1164 | mem_cgroup_uncharge_page(new_page); | 1175 | mem_cgroup_cancel_charge(new_page, memcg); |
1165 | put_page(new_page); | 1176 | put_page(new_page); |
1166 | goto out_mn; | 1177 | goto out_mn; |
1167 | } else { | 1178 | } else { |
@@ -1170,6 +1181,8 @@ alloc: | |||
1170 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 1181 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
1171 | pmdp_clear_flush(vma, haddr, pmd); | 1182 | pmdp_clear_flush(vma, haddr, pmd); |
1172 | page_add_new_anon_rmap(new_page, vma, haddr); | 1183 | page_add_new_anon_rmap(new_page, vma, haddr); |
1184 | mem_cgroup_commit_charge(new_page, memcg, false); | ||
1185 | lru_cache_add_active_or_unevictable(new_page, vma); | ||
1173 | set_pmd_at(mm, haddr, pmd, entry); | 1186 | set_pmd_at(mm, haddr, pmd, entry); |
1174 | update_mmu_cache_pmd(vma, address, pmd); | 1187 | update_mmu_cache_pmd(vma, address, pmd); |
1175 | if (!page) { | 1188 | if (!page) { |
@@ -2413,6 +2426,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2413 | spinlock_t *pmd_ptl, *pte_ptl; | 2426 | spinlock_t *pmd_ptl, *pte_ptl; |
2414 | int isolated; | 2427 | int isolated; |
2415 | unsigned long hstart, hend; | 2428 | unsigned long hstart, hend; |
2429 | struct mem_cgroup *memcg; | ||
2416 | unsigned long mmun_start; /* For mmu_notifiers */ | 2430 | unsigned long mmun_start; /* For mmu_notifiers */ |
2417 | unsigned long mmun_end; /* For mmu_notifiers */ | 2431 | unsigned long mmun_end; /* For mmu_notifiers */ |
2418 | 2432 | ||
@@ -2423,7 +2437,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2423 | if (!new_page) | 2437 | if (!new_page) |
2424 | return; | 2438 | return; |
2425 | 2439 | ||
2426 | if (unlikely(mem_cgroup_charge_anon(new_page, mm, GFP_TRANSHUGE))) | 2440 | if (unlikely(mem_cgroup_try_charge(new_page, mm, |
2441 | GFP_TRANSHUGE, &memcg))) | ||
2427 | return; | 2442 | return; |
2428 | 2443 | ||
2429 | /* | 2444 | /* |
@@ -2510,6 +2525,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2510 | spin_lock(pmd_ptl); | 2525 | spin_lock(pmd_ptl); |
2511 | BUG_ON(!pmd_none(*pmd)); | 2526 | BUG_ON(!pmd_none(*pmd)); |
2512 | page_add_new_anon_rmap(new_page, vma, address); | 2527 | page_add_new_anon_rmap(new_page, vma, address); |
2528 | mem_cgroup_commit_charge(new_page, memcg, false); | ||
2529 | lru_cache_add_active_or_unevictable(new_page, vma); | ||
2513 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 2530 | pgtable_trans_huge_deposit(mm, pmd, pgtable); |
2514 | set_pmd_at(mm, address, pmd, _pmd); | 2531 | set_pmd_at(mm, address, pmd, _pmd); |
2515 | update_mmu_cache_pmd(vma, address, pmd); | 2532 | update_mmu_cache_pmd(vma, address, pmd); |
@@ -2523,7 +2540,7 @@ out_up_write: | |||
2523 | return; | 2540 | return; |
2524 | 2541 | ||
2525 | out: | 2542 | out: |
2526 | mem_cgroup_uncharge_page(new_page); | 2543 | mem_cgroup_cancel_charge(new_page, memcg); |
2527 | goto out_up_write; | 2544 | goto out_up_write; |
2528 | } | 2545 | } |
2529 | 2546 | ||
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 90dc501eaf3f..1cbe1e54ff5f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2551,17 +2551,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb, | |||
2551 | return NOTIFY_OK; | 2551 | return NOTIFY_OK; |
2552 | } | 2552 | } |
2553 | 2553 | ||
2554 | /** | 2554 | static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, |
2555 | * mem_cgroup_try_charge - try charging a memcg | 2555 | unsigned int nr_pages) |
2556 | * @memcg: memcg to charge | ||
2557 | * @nr_pages: number of pages to charge | ||
2558 | * | ||
2559 | * Returns 0 if @memcg was charged successfully, -EINTR if the charge | ||
2560 | * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed. | ||
2561 | */ | ||
2562 | static int mem_cgroup_try_charge(struct mem_cgroup *memcg, | ||
2563 | gfp_t gfp_mask, | ||
2564 | unsigned int nr_pages) | ||
2565 | { | 2556 | { |
2566 | unsigned int batch = max(CHARGE_BATCH, nr_pages); | 2557 | unsigned int batch = max(CHARGE_BATCH, nr_pages); |
2567 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 2558 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
@@ -2660,41 +2651,7 @@ done: | |||
2660 | return ret; | 2651 | return ret; |
2661 | } | 2652 | } |
2662 | 2653 | ||
2663 | /** | 2654 | static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) |
2664 | * mem_cgroup_try_charge_mm - try charging a mm | ||
2665 | * @mm: mm_struct to charge | ||
2666 | * @nr_pages: number of pages to charge | ||
2667 | * @oom: trigger OOM if reclaim fails | ||
2668 | * | ||
2669 | * Returns the charged mem_cgroup associated with the given mm_struct or | ||
2670 | * NULL the charge failed. | ||
2671 | */ | ||
2672 | static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, | ||
2673 | gfp_t gfp_mask, | ||
2674 | unsigned int nr_pages) | ||
2675 | |||
2676 | { | ||
2677 | struct mem_cgroup *memcg; | ||
2678 | int ret; | ||
2679 | |||
2680 | memcg = get_mem_cgroup_from_mm(mm); | ||
2681 | ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages); | ||
2682 | css_put(&memcg->css); | ||
2683 | if (ret == -EINTR) | ||
2684 | memcg = root_mem_cgroup; | ||
2685 | else if (ret) | ||
2686 | memcg = NULL; | ||
2687 | |||
2688 | return memcg; | ||
2689 | } | ||
2690 | |||
2691 | /* | ||
2692 | * Somemtimes we have to undo a charge we got by try_charge(). | ||
2693 | * This function is for that and do uncharge, put css's refcnt. | ||
2694 | * gotten by try_charge(). | ||
2695 | */ | ||
2696 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, | ||
2697 | unsigned int nr_pages) | ||
2698 | { | 2655 | { |
2699 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2656 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2700 | 2657 | ||
@@ -2760,17 +2717,13 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2760 | return memcg; | 2717 | return memcg; |
2761 | } | 2718 | } |
2762 | 2719 | ||
2763 | static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | 2720 | static void commit_charge(struct page *page, struct mem_cgroup *memcg, |
2764 | struct page *page, | 2721 | unsigned int nr_pages, bool anon, bool lrucare) |
2765 | unsigned int nr_pages, | ||
2766 | enum charge_type ctype, | ||
2767 | bool lrucare) | ||
2768 | { | 2722 | { |
2769 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2723 | struct page_cgroup *pc = lookup_page_cgroup(page); |
2770 | struct zone *uninitialized_var(zone); | 2724 | struct zone *uninitialized_var(zone); |
2771 | struct lruvec *lruvec; | 2725 | struct lruvec *lruvec; |
2772 | bool was_on_lru = false; | 2726 | bool was_on_lru = false; |
2773 | bool anon; | ||
2774 | 2727 | ||
2775 | lock_page_cgroup(pc); | 2728 | lock_page_cgroup(pc); |
2776 | VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); | 2729 | VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); |
@@ -2807,11 +2760,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2807 | spin_unlock_irq(&zone->lru_lock); | 2760 | spin_unlock_irq(&zone->lru_lock); |
2808 | } | 2761 | } |
2809 | 2762 | ||
2810 | if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON) | ||
2811 | anon = true; | ||
2812 | else | ||
2813 | anon = false; | ||
2814 | |||
2815 | mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); | 2763 | mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); |
2816 | unlock_page_cgroup(pc); | 2764 | unlock_page_cgroup(pc); |
2817 | 2765 | ||
@@ -2882,21 +2830,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) | |||
2882 | if (ret) | 2830 | if (ret) |
2883 | return ret; | 2831 | return ret; |
2884 | 2832 | ||
2885 | ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT); | 2833 | ret = try_charge(memcg, gfp, size >> PAGE_SHIFT); |
2886 | if (ret == -EINTR) { | 2834 | if (ret == -EINTR) { |
2887 | /* | 2835 | /* |
2888 | * mem_cgroup_try_charge() chosed to bypass to root due to | 2836 | * try_charge() chose to bypass to root due to OOM kill or |
2889 | * OOM kill or fatal signal. Since our only options are to | 2837 | * fatal signal. Since our only options are to either fail |
2890 | * either fail the allocation or charge it to this cgroup, do | 2838 | * the allocation or charge it to this cgroup, do it as a |
2891 | * it as a temporary condition. But we can't fail. From a | 2839 | * temporary condition. But we can't fail. From a kmem/slab |
2892 | * kmem/slab perspective, the cache has already been selected, | 2840 | * perspective, the cache has already been selected, by |
2893 | * by mem_cgroup_kmem_get_cache(), so it is too late to change | 2841 | * mem_cgroup_kmem_get_cache(), so it is too late to change |
2894 | * our minds. | 2842 | * our minds. |
2895 | * | 2843 | * |
2896 | * This condition will only trigger if the task entered | 2844 | * This condition will only trigger if the task entered |
2897 | * memcg_charge_kmem in a sane state, but was OOM-killed during | 2845 | * memcg_charge_kmem in a sane state, but was OOM-killed |
2898 | * mem_cgroup_try_charge() above. Tasks that were already | 2846 | * during try_charge() above. Tasks that were already dying |
2899 | * dying when the allocation triggers should have been already | 2847 | * when the allocation triggers should have been already |
2900 | * directed to the root cgroup in memcontrol.h | 2848 | * directed to the root cgroup in memcontrol.h |
2901 | */ | 2849 | */ |
2902 | res_counter_charge_nofail(&memcg->res, size, &fail_res); | 2850 | res_counter_charge_nofail(&memcg->res, size, &fail_res); |
@@ -3618,164 +3566,6 @@ out: | |||
3618 | return ret; | 3566 | return ret; |
3619 | } | 3567 | } |
3620 | 3568 | ||
3621 | int mem_cgroup_charge_anon(struct page *page, | ||
3622 | struct mm_struct *mm, gfp_t gfp_mask) | ||
3623 | { | ||
3624 | unsigned int nr_pages = 1; | ||
3625 | struct mem_cgroup *memcg; | ||
3626 | |||
3627 | if (mem_cgroup_disabled()) | ||
3628 | return 0; | ||
3629 | |||
3630 | VM_BUG_ON_PAGE(page_mapped(page), page); | ||
3631 | VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); | ||
3632 | VM_BUG_ON(!mm); | ||
3633 | |||
3634 | if (PageTransHuge(page)) { | ||
3635 | nr_pages <<= compound_order(page); | ||
3636 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
3637 | } | ||
3638 | |||
3639 | memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages); | ||
3640 | if (!memcg) | ||
3641 | return -ENOMEM; | ||
3642 | __mem_cgroup_commit_charge(memcg, page, nr_pages, | ||
3643 | MEM_CGROUP_CHARGE_TYPE_ANON, false); | ||
3644 | return 0; | ||
3645 | } | ||
3646 | |||
3647 | /* | ||
3648 | * While swap-in, try_charge -> commit or cancel, the page is locked. | ||
3649 | * And when try_charge() successfully returns, one refcnt to memcg without | ||
3650 | * struct page_cgroup is acquired. This refcnt will be consumed by | ||
3651 | * "commit()" or removed by "cancel()" | ||
3652 | */ | ||
3653 | static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm, | ||
3654 | struct page *page, | ||
3655 | gfp_t mask, | ||
3656 | struct mem_cgroup **memcgp) | ||
3657 | { | ||
3658 | struct mem_cgroup *memcg = NULL; | ||
3659 | struct page_cgroup *pc; | ||
3660 | int ret; | ||
3661 | |||
3662 | pc = lookup_page_cgroup(page); | ||
3663 | /* | ||
3664 | * Every swap fault against a single page tries to charge the | ||
3665 | * page, bail as early as possible. shmem_unuse() encounters | ||
3666 | * already charged pages, too. The USED bit is protected by | ||
3667 | * the page lock, which serializes swap cache removal, which | ||
3668 | * in turn serializes uncharging. | ||
3669 | */ | ||
3670 | if (PageCgroupUsed(pc)) | ||
3671 | goto out; | ||
3672 | if (do_swap_account) | ||
3673 | memcg = try_get_mem_cgroup_from_page(page); | ||
3674 | if (!memcg) | ||
3675 | memcg = get_mem_cgroup_from_mm(mm); | ||
3676 | ret = mem_cgroup_try_charge(memcg, mask, 1); | ||
3677 | css_put(&memcg->css); | ||
3678 | if (ret == -EINTR) | ||
3679 | memcg = root_mem_cgroup; | ||
3680 | else if (ret) | ||
3681 | return ret; | ||
3682 | out: | ||
3683 | *memcgp = memcg; | ||
3684 | return 0; | ||
3685 | } | ||
3686 | |||
3687 | int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, | ||
3688 | gfp_t gfp_mask, struct mem_cgroup **memcgp) | ||
3689 | { | ||
3690 | if (mem_cgroup_disabled()) { | ||
3691 | *memcgp = NULL; | ||
3692 | return 0; | ||
3693 | } | ||
3694 | /* | ||
3695 | * A racing thread's fault, or swapoff, may have already | ||
3696 | * updated the pte, and even removed page from swap cache: in | ||
3697 | * those cases unuse_pte()'s pte_same() test will fail; but | ||
3698 | * there's also a KSM case which does need to charge the page. | ||
3699 | */ | ||
3700 | if (!PageSwapCache(page)) { | ||
3701 | struct mem_cgroup *memcg; | ||
3702 | |||
3703 | memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1); | ||
3704 | if (!memcg) | ||
3705 | return -ENOMEM; | ||
3706 | *memcgp = memcg; | ||
3707 | return 0; | ||
3708 | } | ||
3709 | return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); | ||
3710 | } | ||
3711 | |||
3712 | void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg) | ||
3713 | { | ||
3714 | if (mem_cgroup_disabled()) | ||
3715 | return; | ||
3716 | if (!memcg) | ||
3717 | return; | ||
3718 | __mem_cgroup_cancel_charge(memcg, 1); | ||
3719 | } | ||
3720 | |||
3721 | static void | ||
3722 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg, | ||
3723 | enum charge_type ctype) | ||
3724 | { | ||
3725 | if (mem_cgroup_disabled()) | ||
3726 | return; | ||
3727 | if (!memcg) | ||
3728 | return; | ||
3729 | |||
3730 | __mem_cgroup_commit_charge(memcg, page, 1, ctype, true); | ||
3731 | /* | ||
3732 | * Now swap is on-memory. This means this page may be | ||
3733 | * counted both as mem and swap....double count. | ||
3734 | * Fix it by uncharging from memsw. Basically, this SwapCache is stable | ||
3735 | * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page() | ||
3736 | * may call delete_from_swap_cache() before reach here. | ||
3737 | */ | ||
3738 | if (do_swap_account && PageSwapCache(page)) { | ||
3739 | swp_entry_t ent = {.val = page_private(page)}; | ||
3740 | mem_cgroup_uncharge_swap(ent); | ||
3741 | } | ||
3742 | } | ||
3743 | |||
3744 | void mem_cgroup_commit_charge_swapin(struct page *page, | ||
3745 | struct mem_cgroup *memcg) | ||
3746 | { | ||
3747 | __mem_cgroup_commit_charge_swapin(page, memcg, | ||
3748 | MEM_CGROUP_CHARGE_TYPE_ANON); | ||
3749 | } | ||
3750 | |||
3751 | int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, | ||
3752 | gfp_t gfp_mask) | ||
3753 | { | ||
3754 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
3755 | struct mem_cgroup *memcg; | ||
3756 | int ret; | ||
3757 | |||
3758 | if (mem_cgroup_disabled()) | ||
3759 | return 0; | ||
3760 | if (PageCompound(page)) | ||
3761 | return 0; | ||
3762 | |||
3763 | if (PageSwapCache(page)) { /* shmem */ | ||
3764 | ret = __mem_cgroup_try_charge_swapin(mm, page, | ||
3765 | gfp_mask, &memcg); | ||
3766 | if (ret) | ||
3767 | return ret; | ||
3768 | __mem_cgroup_commit_charge_swapin(page, memcg, type); | ||
3769 | return 0; | ||
3770 | } | ||
3771 | |||
3772 | memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1); | ||
3773 | if (!memcg) | ||
3774 | return -ENOMEM; | ||
3775 | __mem_cgroup_commit_charge(memcg, page, 1, type, false); | ||
3776 | return 0; | ||
3777 | } | ||
3778 | |||
3779 | static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, | 3569 | static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, |
3780 | unsigned int nr_pages, | 3570 | unsigned int nr_pages, |
3781 | const enum charge_type ctype) | 3571 | const enum charge_type ctype) |
@@ -4122,7 +3912,6 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, | |||
4122 | struct mem_cgroup *memcg = NULL; | 3912 | struct mem_cgroup *memcg = NULL; |
4123 | unsigned int nr_pages = 1; | 3913 | unsigned int nr_pages = 1; |
4124 | struct page_cgroup *pc; | 3914 | struct page_cgroup *pc; |
4125 | enum charge_type ctype; | ||
4126 | 3915 | ||
4127 | *memcgp = NULL; | 3916 | *memcgp = NULL; |
4128 | 3917 | ||
@@ -4184,16 +3973,12 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, | |||
4184 | * page. In the case new page is migrated but not remapped, new page's | 3973 | * page. In the case new page is migrated but not remapped, new page's |
4185 | * mapcount will be finally 0 and we call uncharge in end_migration(). | 3974 | * mapcount will be finally 0 and we call uncharge in end_migration(). |
4186 | */ | 3975 | */ |
4187 | if (PageAnon(page)) | ||
4188 | ctype = MEM_CGROUP_CHARGE_TYPE_ANON; | ||
4189 | else | ||
4190 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
4191 | /* | 3976 | /* |
4192 | * The page is committed to the memcg, but it's not actually | 3977 | * The page is committed to the memcg, but it's not actually |
4193 | * charged to the res_counter since we plan on replacing the | 3978 | * charged to the res_counter since we plan on replacing the |
4194 | * old one and only one page is going to be left afterwards. | 3979 | * old one and only one page is going to be left afterwards. |
4195 | */ | 3980 | */ |
4196 | __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false); | 3981 | commit_charge(newpage, memcg, nr_pages, PageAnon(page), false); |
4197 | } | 3982 | } |
4198 | 3983 | ||
4199 | /* remove redundant charge if migration failed*/ | 3984 | /* remove redundant charge if migration failed*/ |
@@ -4252,7 +4037,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
4252 | { | 4037 | { |
4253 | struct mem_cgroup *memcg = NULL; | 4038 | struct mem_cgroup *memcg = NULL; |
4254 | struct page_cgroup *pc; | 4039 | struct page_cgroup *pc; |
4255 | enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; | ||
4256 | 4040 | ||
4257 | if (mem_cgroup_disabled()) | 4041 | if (mem_cgroup_disabled()) |
4258 | return; | 4042 | return; |
@@ -4278,7 +4062,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage, | |||
4278 | * the newpage may be on LRU(or pagevec for LRU) already. We lock | 4062 | * the newpage may be on LRU(or pagevec for LRU) already. We lock |
4279 | * LRU while we overwrite pc->mem_cgroup. | 4063 | * LRU while we overwrite pc->mem_cgroup. |
4280 | */ | 4064 | */ |
4281 | __mem_cgroup_commit_charge(memcg, newpage, 1, type, true); | 4065 | commit_charge(newpage, memcg, 1, false, true); |
4282 | } | 4066 | } |
4283 | 4067 | ||
4284 | #ifdef CONFIG_DEBUG_VM | 4068 | #ifdef CONFIG_DEBUG_VM |
@@ -6319,20 +6103,19 @@ static int mem_cgroup_do_precharge(unsigned long count) | |||
6319 | int ret; | 6103 | int ret; |
6320 | 6104 | ||
6321 | /* Try a single bulk charge without reclaim first */ | 6105 | /* Try a single bulk charge without reclaim first */ |
6322 | ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); | 6106 | ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); |
6323 | if (!ret) { | 6107 | if (!ret) { |
6324 | mc.precharge += count; | 6108 | mc.precharge += count; |
6325 | return ret; | 6109 | return ret; |
6326 | } | 6110 | } |
6327 | if (ret == -EINTR) { | 6111 | if (ret == -EINTR) { |
6328 | __mem_cgroup_cancel_charge(root_mem_cgroup, count); | 6112 | cancel_charge(root_mem_cgroup, count); |
6329 | return ret; | 6113 | return ret; |
6330 | } | 6114 | } |
6331 | 6115 | ||
6332 | /* Try charges one by one with reclaim */ | 6116 | /* Try charges one by one with reclaim */ |
6333 | while (count--) { | 6117 | while (count--) { |
6334 | ret = mem_cgroup_try_charge(mc.to, | 6118 | ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1); |
6335 | GFP_KERNEL & ~__GFP_NORETRY, 1); | ||
6336 | /* | 6119 | /* |
6337 | * In case of failure, any residual charges against | 6120 | * In case of failure, any residual charges against |
6338 | * mc.to will be dropped by mem_cgroup_clear_mc() | 6121 | * mc.to will be dropped by mem_cgroup_clear_mc() |
@@ -6340,7 +6123,7 @@ static int mem_cgroup_do_precharge(unsigned long count) | |||
6340 | * bypassed to root right away or they'll be lost. | 6123 | * bypassed to root right away or they'll be lost. |
6341 | */ | 6124 | */ |
6342 | if (ret == -EINTR) | 6125 | if (ret == -EINTR) |
6343 | __mem_cgroup_cancel_charge(root_mem_cgroup, 1); | 6126 | cancel_charge(root_mem_cgroup, 1); |
6344 | if (ret) | 6127 | if (ret) |
6345 | return ret; | 6128 | return ret; |
6346 | mc.precharge++; | 6129 | mc.precharge++; |
@@ -6609,7 +6392,7 @@ static void __mem_cgroup_clear_mc(void) | |||
6609 | 6392 | ||
6610 | /* we must uncharge all the leftover precharges from mc.to */ | 6393 | /* we must uncharge all the leftover precharges from mc.to */ |
6611 | if (mc.precharge) { | 6394 | if (mc.precharge) { |
6612 | __mem_cgroup_cancel_charge(mc.to, mc.precharge); | 6395 | cancel_charge(mc.to, mc.precharge); |
6613 | mc.precharge = 0; | 6396 | mc.precharge = 0; |
6614 | } | 6397 | } |
6615 | /* | 6398 | /* |
@@ -6617,7 +6400,7 @@ static void __mem_cgroup_clear_mc(void) | |||
6617 | * we must uncharge here. | 6400 | * we must uncharge here. |
6618 | */ | 6401 | */ |
6619 | if (mc.moved_charge) { | 6402 | if (mc.moved_charge) { |
6620 | __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); | 6403 | cancel_charge(mc.from, mc.moved_charge); |
6621 | mc.moved_charge = 0; | 6404 | mc.moved_charge = 0; |
6622 | } | 6405 | } |
6623 | /* we must fixup refcnts and charges */ | 6406 | /* we must fixup refcnts and charges */ |
@@ -6946,6 +6729,150 @@ static void __init enable_swap_cgroup(void) | |||
6946 | } | 6729 | } |
6947 | #endif | 6730 | #endif |
6948 | 6731 | ||
6732 | /** | ||
6733 | * mem_cgroup_try_charge - try charging a page | ||
6734 | * @page: page to charge | ||
6735 | * @mm: mm context of the victim | ||
6736 | * @gfp_mask: reclaim mode | ||
6737 | * @memcgp: charged memcg return | ||
6738 | * | ||
6739 | * Try to charge @page to the memcg that @mm belongs to, reclaiming | ||
6740 | * pages according to @gfp_mask if necessary. | ||
6741 | * | ||
6742 | * Returns 0 on success, with *@memcgp pointing to the charged memcg. | ||
6743 | * Otherwise, an error code is returned. | ||
6744 | * | ||
6745 | * After page->mapping has been set up, the caller must finalize the | ||
6746 | * charge with mem_cgroup_commit_charge(). Or abort the transaction | ||
6747 | * with mem_cgroup_cancel_charge() in case page instantiation fails. | ||
6748 | */ | ||
6749 | int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | ||
6750 | gfp_t gfp_mask, struct mem_cgroup **memcgp) | ||
6751 | { | ||
6752 | struct mem_cgroup *memcg = NULL; | ||
6753 | unsigned int nr_pages = 1; | ||
6754 | int ret = 0; | ||
6755 | |||
6756 | if (mem_cgroup_disabled()) | ||
6757 | goto out; | ||
6758 | |||
6759 | if (PageSwapCache(page)) { | ||
6760 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
6761 | /* | ||
6762 | * Every swap fault against a single page tries to charge the | ||
6763 | * page, bail as early as possible. shmem_unuse() encounters | ||
6764 | * already charged pages, too. The USED bit is protected by | ||
6765 | * the page lock, which serializes swap cache removal, which | ||
6766 | * in turn serializes uncharging. | ||
6767 | */ | ||
6768 | if (PageCgroupUsed(pc)) | ||
6769 | goto out; | ||
6770 | } | ||
6771 | |||
6772 | if (PageTransHuge(page)) { | ||
6773 | nr_pages <<= compound_order(page); | ||
6774 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
6775 | } | ||
6776 | |||
6777 | if (do_swap_account && PageSwapCache(page)) | ||
6778 | memcg = try_get_mem_cgroup_from_page(page); | ||
6779 | if (!memcg) | ||
6780 | memcg = get_mem_cgroup_from_mm(mm); | ||
6781 | |||
6782 | ret = try_charge(memcg, gfp_mask, nr_pages); | ||
6783 | |||
6784 | css_put(&memcg->css); | ||
6785 | |||
6786 | if (ret == -EINTR) { | ||
6787 | memcg = root_mem_cgroup; | ||
6788 | ret = 0; | ||
6789 | } | ||
6790 | out: | ||
6791 | *memcgp = memcg; | ||
6792 | return ret; | ||
6793 | } | ||
6794 | |||
6795 | /** | ||
6796 | * mem_cgroup_commit_charge - commit a page charge | ||
6797 | * @page: page to charge | ||
6798 | * @memcg: memcg to charge the page to | ||
6799 | * @lrucare: page might be on LRU already | ||
6800 | * | ||
6801 | * Finalize a charge transaction started by mem_cgroup_try_charge(), | ||
6802 | * after page->mapping has been set up. This must happen atomically | ||
6803 | * as part of the page instantiation, i.e. under the page table lock | ||
6804 | * for anonymous pages, under the page lock for page and swap cache. | ||
6805 | * | ||
6806 | * In addition, the page must not be on the LRU during the commit, to | ||
6807 | * prevent racing with task migration. If it might be, use @lrucare. | ||
6808 | * | ||
6809 | * Use mem_cgroup_cancel_charge() to cancel the transaction instead. | ||
6810 | */ | ||
6811 | void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, | ||
6812 | bool lrucare) | ||
6813 | { | ||
6814 | unsigned int nr_pages = 1; | ||
6815 | |||
6816 | VM_BUG_ON_PAGE(!page->mapping, page); | ||
6817 | VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page); | ||
6818 | |||
6819 | if (mem_cgroup_disabled()) | ||
6820 | return; | ||
6821 | /* | ||
6822 | * Swap faults will attempt to charge the same page multiple | ||
6823 | * times. But reuse_swap_page() might have removed the page | ||
6824 | * from swapcache already, so we can't check PageSwapCache(). | ||
6825 | */ | ||
6826 | if (!memcg) | ||
6827 | return; | ||
6828 | |||
6829 | if (PageTransHuge(page)) { | ||
6830 | nr_pages <<= compound_order(page); | ||
6831 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
6832 | } | ||
6833 | |||
6834 | commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare); | ||
6835 | |||
6836 | if (do_swap_account && PageSwapCache(page)) { | ||
6837 | swp_entry_t entry = { .val = page_private(page) }; | ||
6838 | /* | ||
6839 | * The swap entry might not get freed for a long time, | ||
6840 | * let's not wait for it. The page already received a | ||
6841 | * memory+swap charge, drop the swap entry duplicate. | ||
6842 | */ | ||
6843 | mem_cgroup_uncharge_swap(entry); | ||
6844 | } | ||
6845 | } | ||
6846 | |||
6847 | /** | ||
6848 | * mem_cgroup_cancel_charge - cancel a page charge | ||
6849 | * @page: page to charge | ||
6850 | * @memcg: memcg to charge the page to | ||
6851 | * | ||
6852 | * Cancel a charge transaction started by mem_cgroup_try_charge(). | ||
6853 | */ | ||
6854 | void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) | ||
6855 | { | ||
6856 | unsigned int nr_pages = 1; | ||
6857 | |||
6858 | if (mem_cgroup_disabled()) | ||
6859 | return; | ||
6860 | /* | ||
6861 | * Swap faults will attempt to charge the same page multiple | ||
6862 | * times. But reuse_swap_page() might have removed the page | ||
6863 | * from swapcache already, so we can't check PageSwapCache(). | ||
6864 | */ | ||
6865 | if (!memcg) | ||
6866 | return; | ||
6867 | |||
6868 | if (PageTransHuge(page)) { | ||
6869 | nr_pages <<= compound_order(page); | ||
6870 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
6871 | } | ||
6872 | |||
6873 | cancel_charge(memcg, nr_pages); | ||
6874 | } | ||
6875 | |||
6949 | /* | 6876 | /* |
6950 | * subsys_initcall() for memory controller. | 6877 | * subsys_initcall() for memory controller. |
6951 | * | 6878 | * |
diff --git a/mm/memory.c b/mm/memory.c index 5c55270729f7..6d7648773dc4 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2049,6 +2049,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2049 | struct page *dirty_page = NULL; | 2049 | struct page *dirty_page = NULL; |
2050 | unsigned long mmun_start = 0; /* For mmu_notifiers */ | 2050 | unsigned long mmun_start = 0; /* For mmu_notifiers */ |
2051 | unsigned long mmun_end = 0; /* For mmu_notifiers */ | 2051 | unsigned long mmun_end = 0; /* For mmu_notifiers */ |
2052 | struct mem_cgroup *memcg; | ||
2052 | 2053 | ||
2053 | old_page = vm_normal_page(vma, address, orig_pte); | 2054 | old_page = vm_normal_page(vma, address, orig_pte); |
2054 | if (!old_page) { | 2055 | if (!old_page) { |
@@ -2204,7 +2205,7 @@ gotten: | |||
2204 | } | 2205 | } |
2205 | __SetPageUptodate(new_page); | 2206 | __SetPageUptodate(new_page); |
2206 | 2207 | ||
2207 | if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) | 2208 | if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) |
2208 | goto oom_free_new; | 2209 | goto oom_free_new; |
2209 | 2210 | ||
2210 | mmun_start = address & PAGE_MASK; | 2211 | mmun_start = address & PAGE_MASK; |
@@ -2234,6 +2235,8 @@ gotten: | |||
2234 | */ | 2235 | */ |
2235 | ptep_clear_flush(vma, address, page_table); | 2236 | ptep_clear_flush(vma, address, page_table); |
2236 | page_add_new_anon_rmap(new_page, vma, address); | 2237 | page_add_new_anon_rmap(new_page, vma, address); |
2238 | mem_cgroup_commit_charge(new_page, memcg, false); | ||
2239 | lru_cache_add_active_or_unevictable(new_page, vma); | ||
2237 | /* | 2240 | /* |
2238 | * We call the notify macro here because, when using secondary | 2241 | * We call the notify macro here because, when using secondary |
2239 | * mmu page tables (such as kvm shadow page tables), we want the | 2242 | * mmu page tables (such as kvm shadow page tables), we want the |
@@ -2271,7 +2274,7 @@ gotten: | |||
2271 | new_page = old_page; | 2274 | new_page = old_page; |
2272 | ret |= VM_FAULT_WRITE; | 2275 | ret |= VM_FAULT_WRITE; |
2273 | } else | 2276 | } else |
2274 | mem_cgroup_uncharge_page(new_page); | 2277 | mem_cgroup_cancel_charge(new_page, memcg); |
2275 | 2278 | ||
2276 | if (new_page) | 2279 | if (new_page) |
2277 | page_cache_release(new_page); | 2280 | page_cache_release(new_page); |
@@ -2410,10 +2413,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2410 | { | 2413 | { |
2411 | spinlock_t *ptl; | 2414 | spinlock_t *ptl; |
2412 | struct page *page, *swapcache; | 2415 | struct page *page, *swapcache; |
2416 | struct mem_cgroup *memcg; | ||
2413 | swp_entry_t entry; | 2417 | swp_entry_t entry; |
2414 | pte_t pte; | 2418 | pte_t pte; |
2415 | int locked; | 2419 | int locked; |
2416 | struct mem_cgroup *ptr; | ||
2417 | int exclusive = 0; | 2420 | int exclusive = 0; |
2418 | int ret = 0; | 2421 | int ret = 0; |
2419 | 2422 | ||
@@ -2489,7 +2492,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2489 | goto out_page; | 2492 | goto out_page; |
2490 | } | 2493 | } |
2491 | 2494 | ||
2492 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2495 | if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) { |
2493 | ret = VM_FAULT_OOM; | 2496 | ret = VM_FAULT_OOM; |
2494 | goto out_page; | 2497 | goto out_page; |
2495 | } | 2498 | } |
@@ -2514,10 +2517,6 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2514 | * while the page is counted on swap but not yet in mapcount i.e. | 2517 | * while the page is counted on swap but not yet in mapcount i.e. |
2515 | * before page_add_anon_rmap() and swap_free(); try_to_free_swap() | 2518 | * before page_add_anon_rmap() and swap_free(); try_to_free_swap() |
2516 | * must be called after the swap_free(), or it will never succeed. | 2519 | * must be called after the swap_free(), or it will never succeed. |
2517 | * Because delete_from_swap_page() may be called by reuse_swap_page(), | ||
2518 | * mem_cgroup_commit_charge_swapin() may not be able to find swp_entry | ||
2519 | * in page->private. In this case, a record in swap_cgroup is silently | ||
2520 | * discarded at swap_free(). | ||
2521 | */ | 2520 | */ |
2522 | 2521 | ||
2523 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2522 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
@@ -2533,12 +2532,14 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2533 | if (pte_swp_soft_dirty(orig_pte)) | 2532 | if (pte_swp_soft_dirty(orig_pte)) |
2534 | pte = pte_mksoft_dirty(pte); | 2533 | pte = pte_mksoft_dirty(pte); |
2535 | set_pte_at(mm, address, page_table, pte); | 2534 | set_pte_at(mm, address, page_table, pte); |
2536 | if (page == swapcache) | 2535 | if (page == swapcache) { |
2537 | do_page_add_anon_rmap(page, vma, address, exclusive); | 2536 | do_page_add_anon_rmap(page, vma, address, exclusive); |
2538 | else /* ksm created a completely new copy */ | 2537 | mem_cgroup_commit_charge(page, memcg, true); |
2538 | } else { /* ksm created a completely new copy */ | ||
2539 | page_add_new_anon_rmap(page, vma, address); | 2539 | page_add_new_anon_rmap(page, vma, address); |
2540 | /* It's better to call commit-charge after rmap is established */ | 2540 | mem_cgroup_commit_charge(page, memcg, false); |
2541 | mem_cgroup_commit_charge_swapin(page, ptr); | 2541 | lru_cache_add_active_or_unevictable(page, vma); |
2542 | } | ||
2542 | 2543 | ||
2543 | swap_free(entry); | 2544 | swap_free(entry); |
2544 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) | 2545 | if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) |
@@ -2571,7 +2572,7 @@ unlock: | |||
2571 | out: | 2572 | out: |
2572 | return ret; | 2573 | return ret; |
2573 | out_nomap: | 2574 | out_nomap: |
2574 | mem_cgroup_cancel_charge_swapin(ptr); | 2575 | mem_cgroup_cancel_charge(page, memcg); |
2575 | pte_unmap_unlock(page_table, ptl); | 2576 | pte_unmap_unlock(page_table, ptl); |
2576 | out_page: | 2577 | out_page: |
2577 | unlock_page(page); | 2578 | unlock_page(page); |
@@ -2627,6 +2628,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2627 | unsigned long address, pte_t *page_table, pmd_t *pmd, | 2628 | unsigned long address, pte_t *page_table, pmd_t *pmd, |
2628 | unsigned int flags) | 2629 | unsigned int flags) |
2629 | { | 2630 | { |
2631 | struct mem_cgroup *memcg; | ||
2630 | struct page *page; | 2632 | struct page *page; |
2631 | spinlock_t *ptl; | 2633 | spinlock_t *ptl; |
2632 | pte_t entry; | 2634 | pte_t entry; |
@@ -2660,7 +2662,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2660 | */ | 2662 | */ |
2661 | __SetPageUptodate(page); | 2663 | __SetPageUptodate(page); |
2662 | 2664 | ||
2663 | if (mem_cgroup_charge_anon(page, mm, GFP_KERNEL)) | 2665 | if (mem_cgroup_try_charge(page, mm, GFP_KERNEL, &memcg)) |
2664 | goto oom_free_page; | 2666 | goto oom_free_page; |
2665 | 2667 | ||
2666 | entry = mk_pte(page, vma->vm_page_prot); | 2668 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -2673,6 +2675,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2673 | 2675 | ||
2674 | inc_mm_counter_fast(mm, MM_ANONPAGES); | 2676 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2675 | page_add_new_anon_rmap(page, vma, address); | 2677 | page_add_new_anon_rmap(page, vma, address); |
2678 | mem_cgroup_commit_charge(page, memcg, false); | ||
2679 | lru_cache_add_active_or_unevictable(page, vma); | ||
2676 | setpte: | 2680 | setpte: |
2677 | set_pte_at(mm, address, page_table, entry); | 2681 | set_pte_at(mm, address, page_table, entry); |
2678 | 2682 | ||
@@ -2682,7 +2686,7 @@ unlock: | |||
2682 | pte_unmap_unlock(page_table, ptl); | 2686 | pte_unmap_unlock(page_table, ptl); |
2683 | return 0; | 2687 | return 0; |
2684 | release: | 2688 | release: |
2685 | mem_cgroup_uncharge_page(page); | 2689 | mem_cgroup_cancel_charge(page, memcg); |
2686 | page_cache_release(page); | 2690 | page_cache_release(page); |
2687 | goto unlock; | 2691 | goto unlock; |
2688 | oom_free_page: | 2692 | oom_free_page: |
@@ -2919,6 +2923,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2919 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) | 2923 | pgoff_t pgoff, unsigned int flags, pte_t orig_pte) |
2920 | { | 2924 | { |
2921 | struct page *fault_page, *new_page; | 2925 | struct page *fault_page, *new_page; |
2926 | struct mem_cgroup *memcg; | ||
2922 | spinlock_t *ptl; | 2927 | spinlock_t *ptl; |
2923 | pte_t *pte; | 2928 | pte_t *pte; |
2924 | int ret; | 2929 | int ret; |
@@ -2930,7 +2935,7 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2930 | if (!new_page) | 2935 | if (!new_page) |
2931 | return VM_FAULT_OOM; | 2936 | return VM_FAULT_OOM; |
2932 | 2937 | ||
2933 | if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) { | 2938 | if (mem_cgroup_try_charge(new_page, mm, GFP_KERNEL, &memcg)) { |
2934 | page_cache_release(new_page); | 2939 | page_cache_release(new_page); |
2935 | return VM_FAULT_OOM; | 2940 | return VM_FAULT_OOM; |
2936 | } | 2941 | } |
@@ -2950,12 +2955,14 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2950 | goto uncharge_out; | 2955 | goto uncharge_out; |
2951 | } | 2956 | } |
2952 | do_set_pte(vma, address, new_page, pte, true, true); | 2957 | do_set_pte(vma, address, new_page, pte, true, true); |
2958 | mem_cgroup_commit_charge(new_page, memcg, false); | ||
2959 | lru_cache_add_active_or_unevictable(new_page, vma); | ||
2953 | pte_unmap_unlock(pte, ptl); | 2960 | pte_unmap_unlock(pte, ptl); |
2954 | unlock_page(fault_page); | 2961 | unlock_page(fault_page); |
2955 | page_cache_release(fault_page); | 2962 | page_cache_release(fault_page); |
2956 | return ret; | 2963 | return ret; |
2957 | uncharge_out: | 2964 | uncharge_out: |
2958 | mem_cgroup_uncharge_page(new_page); | 2965 | mem_cgroup_cancel_charge(new_page, memcg); |
2959 | page_cache_release(new_page); | 2966 | page_cache_release(new_page); |
2960 | return ret; | 2967 | return ret; |
2961 | } | 2968 | } |
@@ -1032,25 +1032,6 @@ void page_add_new_anon_rmap(struct page *page, | |||
1032 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, | 1032 | __mod_zone_page_state(page_zone(page), NR_ANON_PAGES, |
1033 | hpage_nr_pages(page)); | 1033 | hpage_nr_pages(page)); |
1034 | __page_set_anon_rmap(page, vma, address, 1); | 1034 | __page_set_anon_rmap(page, vma, address, 1); |
1035 | |||
1036 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
1037 | if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { | ||
1038 | SetPageActive(page); | ||
1039 | lru_cache_add(page); | ||
1040 | return; | ||
1041 | } | ||
1042 | |||
1043 | if (!TestSetPageMlocked(page)) { | ||
1044 | /* | ||
1045 | * We use the irq-unsafe __mod_zone_page_stat because this | ||
1046 | * counter is not modified from interrupt context, and the pte | ||
1047 | * lock is held(spinlock), which implies preemption disabled. | ||
1048 | */ | ||
1049 | __mod_zone_page_state(page_zone(page), NR_MLOCK, | ||
1050 | hpage_nr_pages(page)); | ||
1051 | count_vm_event(UNEVICTABLE_PGMLOCKED); | ||
1052 | } | ||
1053 | add_page_to_unevictable_list(page); | ||
1054 | } | 1035 | } |
1055 | 1036 | ||
1056 | /** | 1037 | /** |
diff --git a/mm/shmem.c b/mm/shmem.c index 302d1cf7ad07..1f1a8085538b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -621,7 +621,7 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, | |||
621 | radswap = swp_to_radix_entry(swap); | 621 | radswap = swp_to_radix_entry(swap); |
622 | index = radix_tree_locate_item(&mapping->page_tree, radswap); | 622 | index = radix_tree_locate_item(&mapping->page_tree, radswap); |
623 | if (index == -1) | 623 | if (index == -1) |
624 | return 0; | 624 | return -EAGAIN; /* tell shmem_unuse we found nothing */ |
625 | 625 | ||
626 | /* | 626 | /* |
627 | * Move _head_ to start search for next from here. | 627 | * Move _head_ to start search for next from here. |
@@ -680,7 +680,6 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, | |||
680 | spin_unlock(&info->lock); | 680 | spin_unlock(&info->lock); |
681 | swap_free(swap); | 681 | swap_free(swap); |
682 | } | 682 | } |
683 | error = 1; /* not an error, but entry was found */ | ||
684 | } | 683 | } |
685 | return error; | 684 | return error; |
686 | } | 685 | } |
@@ -692,7 +691,7 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
692 | { | 691 | { |
693 | struct list_head *this, *next; | 692 | struct list_head *this, *next; |
694 | struct shmem_inode_info *info; | 693 | struct shmem_inode_info *info; |
695 | int found = 0; | 694 | struct mem_cgroup *memcg; |
696 | int error = 0; | 695 | int error = 0; |
697 | 696 | ||
698 | /* | 697 | /* |
@@ -707,26 +706,32 @@ int shmem_unuse(swp_entry_t swap, struct page *page) | |||
707 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). | 706 | * the shmem_swaplist_mutex which might hold up shmem_writepage(). |
708 | * Charged back to the user (not to caller) when swap account is used. | 707 | * Charged back to the user (not to caller) when swap account is used. |
709 | */ | 708 | */ |
710 | error = mem_cgroup_charge_file(page, current->mm, GFP_KERNEL); | 709 | error = mem_cgroup_try_charge(page, current->mm, GFP_KERNEL, &memcg); |
711 | if (error) | 710 | if (error) |
712 | goto out; | 711 | goto out; |
713 | /* No radix_tree_preload: swap entry keeps a place for page in tree */ | 712 | /* No radix_tree_preload: swap entry keeps a place for page in tree */ |
713 | error = -EAGAIN; | ||
714 | 714 | ||
715 | mutex_lock(&shmem_swaplist_mutex); | 715 | mutex_lock(&shmem_swaplist_mutex); |
716 | list_for_each_safe(this, next, &shmem_swaplist) { | 716 | list_for_each_safe(this, next, &shmem_swaplist) { |
717 | info = list_entry(this, struct shmem_inode_info, swaplist); | 717 | info = list_entry(this, struct shmem_inode_info, swaplist); |
718 | if (info->swapped) | 718 | if (info->swapped) |
719 | found = shmem_unuse_inode(info, swap, &page); | 719 | error = shmem_unuse_inode(info, swap, &page); |
720 | else | 720 | else |
721 | list_del_init(&info->swaplist); | 721 | list_del_init(&info->swaplist); |
722 | cond_resched(); | 722 | cond_resched(); |
723 | if (found) | 723 | if (error != -EAGAIN) |
724 | break; | 724 | break; |
725 | /* found nothing in this: move on to search the next */ | ||
725 | } | 726 | } |
726 | mutex_unlock(&shmem_swaplist_mutex); | 727 | mutex_unlock(&shmem_swaplist_mutex); |
727 | 728 | ||
728 | if (found < 0) | 729 | if (error) { |
729 | error = found; | 730 | if (error != -ENOMEM) |
731 | error = 0; | ||
732 | mem_cgroup_cancel_charge(page, memcg); | ||
733 | } else | ||
734 | mem_cgroup_commit_charge(page, memcg, true); | ||
730 | out: | 735 | out: |
731 | unlock_page(page); | 736 | unlock_page(page); |
732 | page_cache_release(page); | 737 | page_cache_release(page); |
@@ -1030,6 +1035,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, | |||
1030 | struct address_space *mapping = inode->i_mapping; | 1035 | struct address_space *mapping = inode->i_mapping; |
1031 | struct shmem_inode_info *info; | 1036 | struct shmem_inode_info *info; |
1032 | struct shmem_sb_info *sbinfo; | 1037 | struct shmem_sb_info *sbinfo; |
1038 | struct mem_cgroup *memcg; | ||
1033 | struct page *page; | 1039 | struct page *page; |
1034 | swp_entry_t swap; | 1040 | swp_entry_t swap; |
1035 | int error; | 1041 | int error; |
@@ -1108,8 +1114,7 @@ repeat: | |||
1108 | goto failed; | 1114 | goto failed; |
1109 | } | 1115 | } |
1110 | 1116 | ||
1111 | error = mem_cgroup_charge_file(page, current->mm, | 1117 | error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg); |
1112 | gfp & GFP_RECLAIM_MASK); | ||
1113 | if (!error) { | 1118 | if (!error) { |
1114 | error = shmem_add_to_page_cache(page, mapping, index, | 1119 | error = shmem_add_to_page_cache(page, mapping, index, |
1115 | swp_to_radix_entry(swap)); | 1120 | swp_to_radix_entry(swap)); |
@@ -1125,12 +1130,16 @@ repeat: | |||
1125 | * Reset swap.val? No, leave it so "failed" goes back to | 1130 | * Reset swap.val? No, leave it so "failed" goes back to |
1126 | * "repeat": reading a hole and writing should succeed. | 1131 | * "repeat": reading a hole and writing should succeed. |
1127 | */ | 1132 | */ |
1128 | if (error) | 1133 | if (error) { |
1134 | mem_cgroup_cancel_charge(page, memcg); | ||
1129 | delete_from_swap_cache(page); | 1135 | delete_from_swap_cache(page); |
1136 | } | ||
1130 | } | 1137 | } |
1131 | if (error) | 1138 | if (error) |
1132 | goto failed; | 1139 | goto failed; |
1133 | 1140 | ||
1141 | mem_cgroup_commit_charge(page, memcg, true); | ||
1142 | |||
1134 | spin_lock(&info->lock); | 1143 | spin_lock(&info->lock); |
1135 | info->swapped--; | 1144 | info->swapped--; |
1136 | shmem_recalc_inode(inode); | 1145 | shmem_recalc_inode(inode); |
@@ -1168,8 +1177,7 @@ repeat: | |||
1168 | if (sgp == SGP_WRITE) | 1177 | if (sgp == SGP_WRITE) |
1169 | __SetPageReferenced(page); | 1178 | __SetPageReferenced(page); |
1170 | 1179 | ||
1171 | error = mem_cgroup_charge_file(page, current->mm, | 1180 | error = mem_cgroup_try_charge(page, current->mm, gfp, &memcg); |
1172 | gfp & GFP_RECLAIM_MASK); | ||
1173 | if (error) | 1181 | if (error) |
1174 | goto decused; | 1182 | goto decused; |
1175 | error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); | 1183 | error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK); |
@@ -1179,9 +1187,10 @@ repeat: | |||
1179 | radix_tree_preload_end(); | 1187 | radix_tree_preload_end(); |
1180 | } | 1188 | } |
1181 | if (error) { | 1189 | if (error) { |
1182 | mem_cgroup_uncharge_cache_page(page); | 1190 | mem_cgroup_cancel_charge(page, memcg); |
1183 | goto decused; | 1191 | goto decused; |
1184 | } | 1192 | } |
1193 | mem_cgroup_commit_charge(page, memcg, false); | ||
1185 | lru_cache_add_anon(page); | 1194 | lru_cache_add_anon(page); |
1186 | 1195 | ||
1187 | spin_lock(&info->lock); | 1196 | spin_lock(&info->lock); |
@@ -687,6 +687,40 @@ void add_page_to_unevictable_list(struct page *page) | |||
687 | spin_unlock_irq(&zone->lru_lock); | 687 | spin_unlock_irq(&zone->lru_lock); |
688 | } | 688 | } |
689 | 689 | ||
690 | /** | ||
691 | * lru_cache_add_active_or_unevictable | ||
692 | * @page: the page to be added to LRU | ||
693 | * @vma: vma in which page is mapped for determining reclaimability | ||
694 | * | ||
695 | * Place @page on the active or unevictable LRU list, depending on its | ||
696 | * evictability. Note that if the page is not evictable, it goes | ||
697 | * directly back onto it's zone's unevictable list, it does NOT use a | ||
698 | * per cpu pagevec. | ||
699 | */ | ||
700 | void lru_cache_add_active_or_unevictable(struct page *page, | ||
701 | struct vm_area_struct *vma) | ||
702 | { | ||
703 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
704 | |||
705 | if (likely((vma->vm_flags & (VM_LOCKED | VM_SPECIAL)) != VM_LOCKED)) { | ||
706 | SetPageActive(page); | ||
707 | lru_cache_add(page); | ||
708 | return; | ||
709 | } | ||
710 | |||
711 | if (!TestSetPageMlocked(page)) { | ||
712 | /* | ||
713 | * We use the irq-unsafe __mod_zone_page_stat because this | ||
714 | * counter is not modified from interrupt context, and the pte | ||
715 | * lock is held(spinlock), which implies preemption disabled. | ||
716 | */ | ||
717 | __mod_zone_page_state(page_zone(page), NR_MLOCK, | ||
718 | hpage_nr_pages(page)); | ||
719 | count_vm_event(UNEVICTABLE_PGMLOCKED); | ||
720 | } | ||
721 | add_page_to_unevictable_list(page); | ||
722 | } | ||
723 | |||
690 | /* | 724 | /* |
691 | * If the page can not be invalidated, it is moved to the | 725 | * If the page can not be invalidated, it is moved to the |
692 | * inactive list to speed up its reclaim. It is moved to the | 726 | * inactive list to speed up its reclaim. It is moved to the |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 4c524f7bd0bf..0883b4912ff7 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1106,15 +1106,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
1106 | if (unlikely(!page)) | 1106 | if (unlikely(!page)) |
1107 | return -ENOMEM; | 1107 | return -ENOMEM; |
1108 | 1108 | ||
1109 | if (mem_cgroup_try_charge_swapin(vma->vm_mm, page, | 1109 | if (mem_cgroup_try_charge(page, vma->vm_mm, GFP_KERNEL, &memcg)) { |
1110 | GFP_KERNEL, &memcg)) { | ||
1111 | ret = -ENOMEM; | 1110 | ret = -ENOMEM; |
1112 | goto out_nolock; | 1111 | goto out_nolock; |
1113 | } | 1112 | } |
1114 | 1113 | ||
1115 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 1114 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
1116 | if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { | 1115 | if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) { |
1117 | mem_cgroup_cancel_charge_swapin(memcg); | 1116 | mem_cgroup_cancel_charge(page, memcg); |
1118 | ret = 0; | 1117 | ret = 0; |
1119 | goto out; | 1118 | goto out; |
1120 | } | 1119 | } |
@@ -1124,11 +1123,14 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, | |||
1124 | get_page(page); | 1123 | get_page(page); |
1125 | set_pte_at(vma->vm_mm, addr, pte, | 1124 | set_pte_at(vma->vm_mm, addr, pte, |
1126 | pte_mkold(mk_pte(page, vma->vm_page_prot))); | 1125 | pte_mkold(mk_pte(page, vma->vm_page_prot))); |
1127 | if (page == swapcache) | 1126 | if (page == swapcache) { |
1128 | page_add_anon_rmap(page, vma, addr); | 1127 | page_add_anon_rmap(page, vma, addr); |
1129 | else /* ksm created a completely new copy */ | 1128 | mem_cgroup_commit_charge(page, memcg, true); |
1129 | } else { /* ksm created a completely new copy */ | ||
1130 | page_add_new_anon_rmap(page, vma, addr); | 1130 | page_add_new_anon_rmap(page, vma, addr); |
1131 | mem_cgroup_commit_charge_swapin(page, memcg); | 1131 | mem_cgroup_commit_charge(page, memcg, false); |
1132 | lru_cache_add_active_or_unevictable(page, vma); | ||
1133 | } | ||
1132 | swap_free(entry); | 1134 | swap_free(entry); |
1133 | /* | 1135 | /* |
1134 | * Move the page to the active list so it is not | 1136 | * Move the page to the active list so it is not |