diff options
-rw-r--r-- | mm/memcontrol.c | 94 |
1 files changed, 28 insertions, 66 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1333d25163bb..31ab2c014fa1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -137,6 +137,7 @@ struct mem_cgroup { | |||
137 | */ | 137 | */ |
138 | struct mem_cgroup_stat stat; | 138 | struct mem_cgroup_stat stat; |
139 | }; | 139 | }; |
140 | static struct mem_cgroup init_mem_cgroup; | ||
140 | 141 | ||
141 | /* | 142 | /* |
142 | * We use the lower bit of the page->page_cgroup pointer as a bit spin | 143 | * We use the lower bit of the page->page_cgroup pointer as a bit spin |
@@ -162,7 +163,7 @@ struct page_cgroup { | |||
162 | struct mem_cgroup *mem_cgroup; | 163 | struct mem_cgroup *mem_cgroup; |
163 | atomic_t ref_cnt; /* Helpful when pages move b/w */ | 164 | atomic_t ref_cnt; /* Helpful when pages move b/w */ |
164 | /* mapped and cached states */ | 165 | /* mapped and cached states */ |
165 | int flags; | 166 | int flags; |
166 | }; | 167 | }; |
167 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ | 168 | #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ |
168 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ | 169 | #define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ |
@@ -177,20 +178,11 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc) | |||
177 | return page_zonenum(pc->page); | 178 | return page_zonenum(pc->page); |
178 | } | 179 | } |
179 | 180 | ||
180 | enum { | ||
181 | MEM_CGROUP_TYPE_UNSPEC = 0, | ||
182 | MEM_CGROUP_TYPE_MAPPED, | ||
183 | MEM_CGROUP_TYPE_CACHED, | ||
184 | MEM_CGROUP_TYPE_ALL, | ||
185 | MEM_CGROUP_TYPE_MAX, | ||
186 | }; | ||
187 | |||
188 | enum charge_type { | 181 | enum charge_type { |
189 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 182 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
190 | MEM_CGROUP_CHARGE_TYPE_MAPPED, | 183 | MEM_CGROUP_CHARGE_TYPE_MAPPED, |
191 | }; | 184 | }; |
192 | 185 | ||
193 | |||
194 | /* | 186 | /* |
195 | * Always modified under lru lock. Then, not necessary to preempt_disable() | 187 | * Always modified under lru lock. Then, not necessary to preempt_disable() |
196 | */ | 188 | */ |
@@ -199,11 +191,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags, | |||
199 | { | 191 | { |
200 | int val = (charge)? 1 : -1; | 192 | int val = (charge)? 1 : -1; |
201 | struct mem_cgroup_stat *stat = &mem->stat; | 193 | struct mem_cgroup_stat *stat = &mem->stat; |
202 | VM_BUG_ON(!irqs_disabled()); | ||
203 | 194 | ||
195 | VM_BUG_ON(!irqs_disabled()); | ||
204 | if (flags & PAGE_CGROUP_FLAG_CACHE) | 196 | if (flags & PAGE_CGROUP_FLAG_CACHE) |
205 | __mem_cgroup_stat_add_safe(stat, | 197 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val); |
206 | MEM_CGROUP_STAT_CACHE, val); | ||
207 | else | 198 | else |
208 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); | 199 | __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); |
209 | } | 200 | } |
@@ -240,8 +231,6 @@ static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem, | |||
240 | return total; | 231 | return total; |
241 | } | 232 | } |
242 | 233 | ||
243 | static struct mem_cgroup init_mem_cgroup; | ||
244 | |||
245 | static inline | 234 | static inline |
246 | struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) | 235 | struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) |
247 | { | 236 | { |
@@ -273,8 +262,7 @@ void mm_free_cgroup(struct mm_struct *mm) | |||
273 | 262 | ||
274 | static inline int page_cgroup_locked(struct page *page) | 263 | static inline int page_cgroup_locked(struct page *page) |
275 | { | 264 | { |
276 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, | 265 | return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); |
277 | &page->page_cgroup); | ||
278 | } | 266 | } |
279 | 267 | ||
280 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | 268 | static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) |
@@ -285,8 +273,7 @@ static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) | |||
285 | 273 | ||
286 | struct page_cgroup *page_get_page_cgroup(struct page *page) | 274 | struct page_cgroup *page_get_page_cgroup(struct page *page) |
287 | { | 275 | { |
288 | return (struct page_cgroup *) | 276 | return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK); |
289 | (page->page_cgroup & ~PAGE_CGROUP_LOCK); | ||
290 | } | 277 | } |
291 | 278 | ||
292 | static void __always_inline lock_page_cgroup(struct page *page) | 279 | static void __always_inline lock_page_cgroup(struct page *page) |
@@ -308,7 +295,6 @@ static void __always_inline unlock_page_cgroup(struct page *page) | |||
308 | * A can can detect failure of clearing by following | 295 | * A can can detect failure of clearing by following |
309 | * clear_page_cgroup(page, pc) == pc | 296 | * clear_page_cgroup(page, pc) == pc |
310 | */ | 297 | */ |
311 | |||
312 | static struct page_cgroup *clear_page_cgroup(struct page *page, | 298 | static struct page_cgroup *clear_page_cgroup(struct page *page, |
313 | struct page_cgroup *pc) | 299 | struct page_cgroup *pc) |
314 | { | 300 | { |
@@ -417,6 +403,7 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) | |||
417 | rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); | 403 | rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); |
418 | return (int)((rss * 100L) / total); | 404 | return (int)((rss * 100L) / total); |
419 | } | 405 | } |
406 | |||
420 | /* | 407 | /* |
421 | * This function is called from vmscan.c. In page reclaiming loop. balance | 408 | * This function is called from vmscan.c. In page reclaiming loop. balance |
422 | * between active and inactive list is calculated. For memory controller | 409 | * between active and inactive list is calculated. For memory controller |
@@ -480,7 +467,6 @@ long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem, | |||
480 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); | 467 | struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); |
481 | 468 | ||
482 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); | 469 | nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); |
483 | |||
484 | return (nr_inactive >> priority); | 470 | return (nr_inactive >> priority); |
485 | } | 471 | } |
486 | 472 | ||
@@ -601,16 +587,11 @@ retry: | |||
601 | rcu_read_lock(); | 587 | rcu_read_lock(); |
602 | mem = rcu_dereference(mm->mem_cgroup); | 588 | mem = rcu_dereference(mm->mem_cgroup); |
603 | /* | 589 | /* |
604 | * For every charge from the cgroup, increment reference | 590 | * For every charge from the cgroup, increment reference count |
605 | * count | ||
606 | */ | 591 | */ |
607 | css_get(&mem->css); | 592 | css_get(&mem->css); |
608 | rcu_read_unlock(); | 593 | rcu_read_unlock(); |
609 | 594 | ||
610 | /* | ||
611 | * If we created the page_cgroup, we should free it on exceeding | ||
612 | * the cgroup limit. | ||
613 | */ | ||
614 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { | 595 | while (res_counter_charge(&mem->res, PAGE_SIZE)) { |
615 | if (!(gfp_mask & __GFP_WAIT)) | 596 | if (!(gfp_mask & __GFP_WAIT)) |
616 | goto out; | 597 | goto out; |
@@ -619,12 +600,12 @@ retry: | |||
619 | continue; | 600 | continue; |
620 | 601 | ||
621 | /* | 602 | /* |
622 | * try_to_free_mem_cgroup_pages() might not give us a full | 603 | * try_to_free_mem_cgroup_pages() might not give us a full |
623 | * picture of reclaim. Some pages are reclaimed and might be | 604 | * picture of reclaim. Some pages are reclaimed and might be |
624 | * moved to swap cache or just unmapped from the cgroup. | 605 | * moved to swap cache or just unmapped from the cgroup. |
625 | * Check the limit again to see if the reclaim reduced the | 606 | * Check the limit again to see if the reclaim reduced the |
626 | * current usage of the cgroup before giving up | 607 | * current usage of the cgroup before giving up |
627 | */ | 608 | */ |
628 | if (res_counter_check_under_limit(&mem->res)) | 609 | if (res_counter_check_under_limit(&mem->res)) |
629 | continue; | 610 | continue; |
630 | 611 | ||
@@ -660,7 +641,6 @@ retry: | |||
660 | 641 | ||
661 | mz = page_cgroup_zoneinfo(pc); | 642 | mz = page_cgroup_zoneinfo(pc); |
662 | spin_lock_irqsave(&mz->lru_lock, flags); | 643 | spin_lock_irqsave(&mz->lru_lock, flags); |
663 | /* Update statistics vector */ | ||
664 | __mem_cgroup_add_list(pc); | 644 | __mem_cgroup_add_list(pc); |
665 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 645 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
666 | 646 | ||
@@ -673,26 +653,19 @@ err: | |||
673 | return -ENOMEM; | 653 | return -ENOMEM; |
674 | } | 654 | } |
675 | 655 | ||
676 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, | 656 | int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) |
677 | gfp_t gfp_mask) | ||
678 | { | 657 | { |
679 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 658 | return mem_cgroup_charge_common(page, mm, gfp_mask, |
680 | MEM_CGROUP_CHARGE_TYPE_MAPPED); | 659 | MEM_CGROUP_CHARGE_TYPE_MAPPED); |
681 | } | 660 | } |
682 | 661 | ||
683 | /* | ||
684 | * See if the cached pages should be charged at all? | ||
685 | */ | ||
686 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 662 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
687 | gfp_t gfp_mask) | 663 | gfp_t gfp_mask) |
688 | { | 664 | { |
689 | int ret = 0; | ||
690 | if (!mm) | 665 | if (!mm) |
691 | mm = &init_mm; | 666 | mm = &init_mm; |
692 | 667 | return mem_cgroup_charge_common(page, mm, gfp_mask, | |
693 | ret = mem_cgroup_charge_common(page, mm, gfp_mask, | ||
694 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 668 | MEM_CGROUP_CHARGE_TYPE_CACHE); |
695 | return ret; | ||
696 | } | 669 | } |
697 | 670 | ||
698 | /* | 671 | /* |
@@ -742,11 +715,11 @@ unlock: | |||
742 | * Returns non-zero if a page (under migration) has valid page_cgroup member. | 715 | * Returns non-zero if a page (under migration) has valid page_cgroup member. |
743 | * Refcnt of page_cgroup is incremented. | 716 | * Refcnt of page_cgroup is incremented. |
744 | */ | 717 | */ |
745 | |||
746 | int mem_cgroup_prepare_migration(struct page *page) | 718 | int mem_cgroup_prepare_migration(struct page *page) |
747 | { | 719 | { |
748 | struct page_cgroup *pc; | 720 | struct page_cgroup *pc; |
749 | int ret = 0; | 721 | int ret = 0; |
722 | |||
750 | lock_page_cgroup(page); | 723 | lock_page_cgroup(page); |
751 | pc = page_get_page_cgroup(page); | 724 | pc = page_get_page_cgroup(page); |
752 | if (pc && atomic_inc_not_zero(&pc->ref_cnt)) | 725 | if (pc && atomic_inc_not_zero(&pc->ref_cnt)) |
@@ -759,28 +732,30 @@ void mem_cgroup_end_migration(struct page *page) | |||
759 | { | 732 | { |
760 | mem_cgroup_uncharge_page(page); | 733 | mem_cgroup_uncharge_page(page); |
761 | } | 734 | } |
735 | |||
762 | /* | 736 | /* |
763 | * We know both *page* and *newpage* are now not-on-LRU and Pg_locked. | 737 | * We know both *page* and *newpage* are now not-on-LRU and PG_locked. |
764 | * And no race with uncharge() routines because page_cgroup for *page* | 738 | * And no race with uncharge() routines because page_cgroup for *page* |
765 | * has extra one reference by mem_cgroup_prepare_migration. | 739 | * has extra one reference by mem_cgroup_prepare_migration. |
766 | */ | 740 | */ |
767 | |||
768 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) | 741 | void mem_cgroup_page_migration(struct page *page, struct page *newpage) |
769 | { | 742 | { |
770 | struct page_cgroup *pc; | 743 | struct page_cgroup *pc; |
771 | struct mem_cgroup *mem; | 744 | struct mem_cgroup *mem; |
772 | unsigned long flags; | 745 | unsigned long flags; |
773 | struct mem_cgroup_per_zone *mz; | 746 | struct mem_cgroup_per_zone *mz; |
747 | |||
774 | retry: | 748 | retry: |
775 | pc = page_get_page_cgroup(page); | 749 | pc = page_get_page_cgroup(page); |
776 | if (!pc) | 750 | if (!pc) |
777 | return; | 751 | return; |
752 | |||
778 | mem = pc->mem_cgroup; | 753 | mem = pc->mem_cgroup; |
779 | mz = page_cgroup_zoneinfo(pc); | 754 | mz = page_cgroup_zoneinfo(pc); |
780 | if (clear_page_cgroup(page, pc) != pc) | 755 | if (clear_page_cgroup(page, pc) != pc) |
781 | goto retry; | 756 | goto retry; |
782 | spin_lock_irqsave(&mz->lru_lock, flags); | ||
783 | 757 | ||
758 | spin_lock_irqsave(&mz->lru_lock, flags); | ||
784 | __mem_cgroup_remove_list(pc); | 759 | __mem_cgroup_remove_list(pc); |
785 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 760 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
786 | 761 | ||
@@ -793,7 +768,6 @@ retry: | |||
793 | spin_lock_irqsave(&mz->lru_lock, flags); | 768 | spin_lock_irqsave(&mz->lru_lock, flags); |
794 | __mem_cgroup_add_list(pc); | 769 | __mem_cgroup_add_list(pc); |
795 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 770 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
796 | return; | ||
797 | } | 771 | } |
798 | 772 | ||
799 | /* | 773 | /* |
@@ -802,8 +776,7 @@ retry: | |||
802 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. | 776 | * *And* this routine doesn't reclaim page itself, just removes page_cgroup. |
803 | */ | 777 | */ |
804 | #define FORCE_UNCHARGE_BATCH (128) | 778 | #define FORCE_UNCHARGE_BATCH (128) |
805 | static void | 779 | static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, |
806 | mem_cgroup_force_empty_list(struct mem_cgroup *mem, | ||
807 | struct mem_cgroup_per_zone *mz, | 780 | struct mem_cgroup_per_zone *mz, |
808 | int active) | 781 | int active) |
809 | { | 782 | { |
@@ -837,27 +810,27 @@ retry: | |||
837 | } else /* being uncharged ? ...do relax */ | 810 | } else /* being uncharged ? ...do relax */ |
838 | break; | 811 | break; |
839 | } | 812 | } |
813 | |||
840 | spin_unlock_irqrestore(&mz->lru_lock, flags); | 814 | spin_unlock_irqrestore(&mz->lru_lock, flags); |
841 | if (!list_empty(list)) { | 815 | if (!list_empty(list)) { |
842 | cond_resched(); | 816 | cond_resched(); |
843 | goto retry; | 817 | goto retry; |
844 | } | 818 | } |
845 | return; | ||
846 | } | 819 | } |
847 | 820 | ||
848 | /* | 821 | /* |
849 | * make mem_cgroup's charge to be 0 if there is no task. | 822 | * make mem_cgroup's charge to be 0 if there is no task. |
850 | * This enables deleting this mem_cgroup. | 823 | * This enables deleting this mem_cgroup. |
851 | */ | 824 | */ |
852 | |||
853 | int mem_cgroup_force_empty(struct mem_cgroup *mem) | 825 | int mem_cgroup_force_empty(struct mem_cgroup *mem) |
854 | { | 826 | { |
855 | int ret = -EBUSY; | 827 | int ret = -EBUSY; |
856 | int node, zid; | 828 | int node, zid; |
829 | |||
857 | css_get(&mem->css); | 830 | css_get(&mem->css); |
858 | /* | 831 | /* |
859 | * page reclaim code (kswapd etc..) will move pages between | 832 | * page reclaim code (kswapd etc..) will move pages between |
860 | ` * active_list <-> inactive_list while we don't take a lock. | 833 | * active_list <-> inactive_list while we don't take a lock. |
861 | * So, we have to do loop here until all lists are empty. | 834 | * So, we have to do loop here until all lists are empty. |
862 | */ | 835 | */ |
863 | while (mem->res.usage > 0) { | 836 | while (mem->res.usage > 0) { |
@@ -879,8 +852,6 @@ out: | |||
879 | return ret; | 852 | return ret; |
880 | } | 853 | } |
881 | 854 | ||
882 | |||
883 | |||
884 | int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) | 855 | int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) |
885 | { | 856 | { |
886 | *tmp = memparse(buf, &buf); | 857 | *tmp = memparse(buf, &buf); |
@@ -918,8 +889,7 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, | |||
918 | size_t nbytes, loff_t *ppos) | 889 | size_t nbytes, loff_t *ppos) |
919 | { | 890 | { |
920 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 891 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
921 | int ret; | 892 | int ret = mem_cgroup_force_empty(mem); |
922 | ret = mem_cgroup_force_empty(mem); | ||
923 | if (!ret) | 893 | if (!ret) |
924 | ret = nbytes; | 894 | ret = nbytes; |
925 | return ret; | 895 | return ret; |
@@ -928,7 +898,6 @@ static ssize_t mem_force_empty_write(struct cgroup *cont, | |||
928 | /* | 898 | /* |
929 | * Note: This should be removed if cgroup supports write-only file. | 899 | * Note: This should be removed if cgroup supports write-only file. |
930 | */ | 900 | */ |
931 | |||
932 | static ssize_t mem_force_empty_read(struct cgroup *cont, | 901 | static ssize_t mem_force_empty_read(struct cgroup *cont, |
933 | struct cftype *cft, | 902 | struct cftype *cft, |
934 | struct file *file, char __user *userbuf, | 903 | struct file *file, char __user *userbuf, |
@@ -937,7 +906,6 @@ static ssize_t mem_force_empty_read(struct cgroup *cont, | |||
937 | return -EINVAL; | 906 | return -EINVAL; |
938 | } | 907 | } |
939 | 908 | ||
940 | |||
941 | static const struct mem_cgroup_stat_desc { | 909 | static const struct mem_cgroup_stat_desc { |
942 | const char *msg; | 910 | const char *msg; |
943 | u64 unit; | 911 | u64 unit; |
@@ -990,8 +958,6 @@ static int mem_control_stat_open(struct inode *unused, struct file *file) | |||
990 | return single_open(file, mem_control_stat_show, cont); | 958 | return single_open(file, mem_control_stat_show, cont); |
991 | } | 959 | } |
992 | 960 | ||
993 | |||
994 | |||
995 | static struct cftype mem_cgroup_files[] = { | 961 | static struct cftype mem_cgroup_files[] = { |
996 | { | 962 | { |
997 | .name = "usage_in_bytes", | 963 | .name = "usage_in_bytes", |
@@ -1057,9 +1023,6 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
1057 | kfree(mem->info.nodeinfo[node]); | 1023 | kfree(mem->info.nodeinfo[node]); |
1058 | } | 1024 | } |
1059 | 1025 | ||
1060 | |||
1061 | static struct mem_cgroup init_mem_cgroup; | ||
1062 | |||
1063 | static struct cgroup_subsys_state * | 1026 | static struct cgroup_subsys_state * |
1064 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | 1027 | mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) |
1065 | { | 1028 | { |
@@ -1149,7 +1112,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
1149 | 1112 | ||
1150 | out: | 1113 | out: |
1151 | mmput(mm); | 1114 | mmput(mm); |
1152 | return; | ||
1153 | } | 1115 | } |
1154 | 1116 | ||
1155 | struct cgroup_subsys mem_cgroup_subsys = { | 1117 | struct cgroup_subsys mem_cgroup_subsys = { |