aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/memcontrol.c94
1 files changed, 28 insertions, 66 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1333d25163bb..31ab2c014fa1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -137,6 +137,7 @@ struct mem_cgroup {
137 */ 137 */
138 struct mem_cgroup_stat stat; 138 struct mem_cgroup_stat stat;
139}; 139};
140static struct mem_cgroup init_mem_cgroup;
140 141
141/* 142/*
142 * We use the lower bit of the page->page_cgroup pointer as a bit spin 143 * We use the lower bit of the page->page_cgroup pointer as a bit spin
@@ -162,7 +163,7 @@ struct page_cgroup {
162 struct mem_cgroup *mem_cgroup; 163 struct mem_cgroup *mem_cgroup;
163 atomic_t ref_cnt; /* Helpful when pages move b/w */ 164 atomic_t ref_cnt; /* Helpful when pages move b/w */
164 /* mapped and cached states */ 165 /* mapped and cached states */
165 int flags; 166 int flags;
166}; 167};
167#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ 168#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
168#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */ 169#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */
@@ -177,20 +178,11 @@ static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
177 return page_zonenum(pc->page); 178 return page_zonenum(pc->page);
178} 179}
179 180
180enum {
181 MEM_CGROUP_TYPE_UNSPEC = 0,
182 MEM_CGROUP_TYPE_MAPPED,
183 MEM_CGROUP_TYPE_CACHED,
184 MEM_CGROUP_TYPE_ALL,
185 MEM_CGROUP_TYPE_MAX,
186};
187
188enum charge_type { 181enum charge_type {
189 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 182 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
190 MEM_CGROUP_CHARGE_TYPE_MAPPED, 183 MEM_CGROUP_CHARGE_TYPE_MAPPED,
191}; 184};
192 185
193
194/* 186/*
195 * Always modified under lru lock. Then, not necessary to preempt_disable() 187 * Always modified under lru lock. Then, not necessary to preempt_disable()
196 */ 188 */
@@ -199,11 +191,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, int flags,
199{ 191{
200 int val = (charge)? 1 : -1; 192 int val = (charge)? 1 : -1;
201 struct mem_cgroup_stat *stat = &mem->stat; 193 struct mem_cgroup_stat *stat = &mem->stat;
202 VM_BUG_ON(!irqs_disabled());
203 194
195 VM_BUG_ON(!irqs_disabled());
204 if (flags & PAGE_CGROUP_FLAG_CACHE) 196 if (flags & PAGE_CGROUP_FLAG_CACHE)
205 __mem_cgroup_stat_add_safe(stat, 197 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_CACHE, val);
206 MEM_CGROUP_STAT_CACHE, val);
207 else 198 else
208 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val); 199 __mem_cgroup_stat_add_safe(stat, MEM_CGROUP_STAT_RSS, val);
209} 200}
@@ -240,8 +231,6 @@ static unsigned long mem_cgroup_get_all_zonestat(struct mem_cgroup *mem,
240 return total; 231 return total;
241} 232}
242 233
243static struct mem_cgroup init_mem_cgroup;
244
245static inline 234static inline
246struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) 235struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
247{ 236{
@@ -273,8 +262,7 @@ void mm_free_cgroup(struct mm_struct *mm)
273 262
274static inline int page_cgroup_locked(struct page *page) 263static inline int page_cgroup_locked(struct page *page)
275{ 264{
276 return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, 265 return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
277 &page->page_cgroup);
278} 266}
279 267
280static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc) 268static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
@@ -285,8 +273,7 @@ static void page_assign_page_cgroup(struct page *page, struct page_cgroup *pc)
285 273
286struct page_cgroup *page_get_page_cgroup(struct page *page) 274struct page_cgroup *page_get_page_cgroup(struct page *page)
287{ 275{
288 return (struct page_cgroup *) 276 return (struct page_cgroup *) (page->page_cgroup & ~PAGE_CGROUP_LOCK);
289 (page->page_cgroup & ~PAGE_CGROUP_LOCK);
290} 277}
291 278
292static void __always_inline lock_page_cgroup(struct page *page) 279static void __always_inline lock_page_cgroup(struct page *page)
@@ -308,7 +295,6 @@ static void __always_inline unlock_page_cgroup(struct page *page)
308 * A can can detect failure of clearing by following 295 * A can can detect failure of clearing by following
309 * clear_page_cgroup(page, pc) == pc 296 * clear_page_cgroup(page, pc) == pc
310 */ 297 */
311
312static struct page_cgroup *clear_page_cgroup(struct page *page, 298static struct page_cgroup *clear_page_cgroup(struct page *page,
313 struct page_cgroup *pc) 299 struct page_cgroup *pc)
314{ 300{
@@ -417,6 +403,7 @@ int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
417 rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); 403 rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
418 return (int)((rss * 100L) / total); 404 return (int)((rss * 100L) / total);
419} 405}
406
420/* 407/*
421 * This function is called from vmscan.c. In page reclaiming loop. balance 408 * This function is called from vmscan.c. In page reclaiming loop. balance
422 * between active and inactive list is calculated. For memory controller 409 * between active and inactive list is calculated. For memory controller
@@ -480,7 +467,6 @@ long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
480 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid); 467 struct mem_cgroup_per_zone *mz = mem_cgroup_zoneinfo(mem, nid, zid);
481 468
482 nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE); 469 nr_inactive = MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE);
483
484 return (nr_inactive >> priority); 470 return (nr_inactive >> priority);
485} 471}
486 472
@@ -601,16 +587,11 @@ retry:
601 rcu_read_lock(); 587 rcu_read_lock();
602 mem = rcu_dereference(mm->mem_cgroup); 588 mem = rcu_dereference(mm->mem_cgroup);
603 /* 589 /*
604 * For every charge from the cgroup, increment reference 590 * For every charge from the cgroup, increment reference count
605 * count
606 */ 591 */
607 css_get(&mem->css); 592 css_get(&mem->css);
608 rcu_read_unlock(); 593 rcu_read_unlock();
609 594
610 /*
611 * If we created the page_cgroup, we should free it on exceeding
612 * the cgroup limit.
613 */
614 while (res_counter_charge(&mem->res, PAGE_SIZE)) { 595 while (res_counter_charge(&mem->res, PAGE_SIZE)) {
615 if (!(gfp_mask & __GFP_WAIT)) 596 if (!(gfp_mask & __GFP_WAIT))
616 goto out; 597 goto out;
@@ -619,12 +600,12 @@ retry:
619 continue; 600 continue;
620 601
621 /* 602 /*
622 * try_to_free_mem_cgroup_pages() might not give us a full 603 * try_to_free_mem_cgroup_pages() might not give us a full
623 * picture of reclaim. Some pages are reclaimed and might be 604 * picture of reclaim. Some pages are reclaimed and might be
624 * moved to swap cache or just unmapped from the cgroup. 605 * moved to swap cache or just unmapped from the cgroup.
625 * Check the limit again to see if the reclaim reduced the 606 * Check the limit again to see if the reclaim reduced the
626 * current usage of the cgroup before giving up 607 * current usage of the cgroup before giving up
627 */ 608 */
628 if (res_counter_check_under_limit(&mem->res)) 609 if (res_counter_check_under_limit(&mem->res))
629 continue; 610 continue;
630 611
@@ -660,7 +641,6 @@ retry:
660 641
661 mz = page_cgroup_zoneinfo(pc); 642 mz = page_cgroup_zoneinfo(pc);
662 spin_lock_irqsave(&mz->lru_lock, flags); 643 spin_lock_irqsave(&mz->lru_lock, flags);
663 /* Update statistics vector */
664 __mem_cgroup_add_list(pc); 644 __mem_cgroup_add_list(pc);
665 spin_unlock_irqrestore(&mz->lru_lock, flags); 645 spin_unlock_irqrestore(&mz->lru_lock, flags);
666 646
@@ -673,26 +653,19 @@ err:
673 return -ENOMEM; 653 return -ENOMEM;
674} 654}
675 655
676int mem_cgroup_charge(struct page *page, struct mm_struct *mm, 656int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
677 gfp_t gfp_mask)
678{ 657{
679 return mem_cgroup_charge_common(page, mm, gfp_mask, 658 return mem_cgroup_charge_common(page, mm, gfp_mask,
680 MEM_CGROUP_CHARGE_TYPE_MAPPED); 659 MEM_CGROUP_CHARGE_TYPE_MAPPED);
681} 660}
682 661
683/*
684 * See if the cached pages should be charged at all?
685 */
686int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 662int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
687 gfp_t gfp_mask) 663 gfp_t gfp_mask)
688{ 664{
689 int ret = 0;
690 if (!mm) 665 if (!mm)
691 mm = &init_mm; 666 mm = &init_mm;
692 667 return mem_cgroup_charge_common(page, mm, gfp_mask,
693 ret = mem_cgroup_charge_common(page, mm, gfp_mask,
694 MEM_CGROUP_CHARGE_TYPE_CACHE); 668 MEM_CGROUP_CHARGE_TYPE_CACHE);
695 return ret;
696} 669}
697 670
698/* 671/*
@@ -742,11 +715,11 @@ unlock:
742 * Returns non-zero if a page (under migration) has valid page_cgroup member. 715 * Returns non-zero if a page (under migration) has valid page_cgroup member.
743 * Refcnt of page_cgroup is incremented. 716 * Refcnt of page_cgroup is incremented.
744 */ 717 */
745
746int mem_cgroup_prepare_migration(struct page *page) 718int mem_cgroup_prepare_migration(struct page *page)
747{ 719{
748 struct page_cgroup *pc; 720 struct page_cgroup *pc;
749 int ret = 0; 721 int ret = 0;
722
750 lock_page_cgroup(page); 723 lock_page_cgroup(page);
751 pc = page_get_page_cgroup(page); 724 pc = page_get_page_cgroup(page);
752 if (pc && atomic_inc_not_zero(&pc->ref_cnt)) 725 if (pc && atomic_inc_not_zero(&pc->ref_cnt))
@@ -759,28 +732,30 @@ void mem_cgroup_end_migration(struct page *page)
759{ 732{
760 mem_cgroup_uncharge_page(page); 733 mem_cgroup_uncharge_page(page);
761} 734}
735
762/* 736/*
763 * We know both *page* and *newpage* are now not-on-LRU and Pg_locked. 737 * We know both *page* and *newpage* are now not-on-LRU and PG_locked.
764 * And no race with uncharge() routines because page_cgroup for *page* 738 * And no race with uncharge() routines because page_cgroup for *page*
765 * has extra one reference by mem_cgroup_prepare_migration. 739 * has extra one reference by mem_cgroup_prepare_migration.
766 */ 740 */
767
768void mem_cgroup_page_migration(struct page *page, struct page *newpage) 741void mem_cgroup_page_migration(struct page *page, struct page *newpage)
769{ 742{
770 struct page_cgroup *pc; 743 struct page_cgroup *pc;
771 struct mem_cgroup *mem; 744 struct mem_cgroup *mem;
772 unsigned long flags; 745 unsigned long flags;
773 struct mem_cgroup_per_zone *mz; 746 struct mem_cgroup_per_zone *mz;
747
774retry: 748retry:
775 pc = page_get_page_cgroup(page); 749 pc = page_get_page_cgroup(page);
776 if (!pc) 750 if (!pc)
777 return; 751 return;
752
778 mem = pc->mem_cgroup; 753 mem = pc->mem_cgroup;
779 mz = page_cgroup_zoneinfo(pc); 754 mz = page_cgroup_zoneinfo(pc);
780 if (clear_page_cgroup(page, pc) != pc) 755 if (clear_page_cgroup(page, pc) != pc)
781 goto retry; 756 goto retry;
782 spin_lock_irqsave(&mz->lru_lock, flags);
783 757
758 spin_lock_irqsave(&mz->lru_lock, flags);
784 __mem_cgroup_remove_list(pc); 759 __mem_cgroup_remove_list(pc);
785 spin_unlock_irqrestore(&mz->lru_lock, flags); 760 spin_unlock_irqrestore(&mz->lru_lock, flags);
786 761
@@ -793,7 +768,6 @@ retry:
793 spin_lock_irqsave(&mz->lru_lock, flags); 768 spin_lock_irqsave(&mz->lru_lock, flags);
794 __mem_cgroup_add_list(pc); 769 __mem_cgroup_add_list(pc);
795 spin_unlock_irqrestore(&mz->lru_lock, flags); 770 spin_unlock_irqrestore(&mz->lru_lock, flags);
796 return;
797} 771}
798 772
799/* 773/*
@@ -802,8 +776,7 @@ retry:
802 * *And* this routine doesn't reclaim page itself, just removes page_cgroup. 776 * *And* this routine doesn't reclaim page itself, just removes page_cgroup.
803 */ 777 */
804#define FORCE_UNCHARGE_BATCH (128) 778#define FORCE_UNCHARGE_BATCH (128)
805static void 779static void mem_cgroup_force_empty_list(struct mem_cgroup *mem,
806mem_cgroup_force_empty_list(struct mem_cgroup *mem,
807 struct mem_cgroup_per_zone *mz, 780 struct mem_cgroup_per_zone *mz,
808 int active) 781 int active)
809{ 782{
@@ -837,27 +810,27 @@ retry:
837 } else /* being uncharged ? ...do relax */ 810 } else /* being uncharged ? ...do relax */
838 break; 811 break;
839 } 812 }
813
840 spin_unlock_irqrestore(&mz->lru_lock, flags); 814 spin_unlock_irqrestore(&mz->lru_lock, flags);
841 if (!list_empty(list)) { 815 if (!list_empty(list)) {
842 cond_resched(); 816 cond_resched();
843 goto retry; 817 goto retry;
844 } 818 }
845 return;
846} 819}
847 820
848/* 821/*
849 * make mem_cgroup's charge to be 0 if there is no task. 822 * make mem_cgroup's charge to be 0 if there is no task.
850 * This enables deleting this mem_cgroup. 823 * This enables deleting this mem_cgroup.
851 */ 824 */
852
853int mem_cgroup_force_empty(struct mem_cgroup *mem) 825int mem_cgroup_force_empty(struct mem_cgroup *mem)
854{ 826{
855 int ret = -EBUSY; 827 int ret = -EBUSY;
856 int node, zid; 828 int node, zid;
829
857 css_get(&mem->css); 830 css_get(&mem->css);
858 /* 831 /*
859 * page reclaim code (kswapd etc..) will move pages between 832 * page reclaim code (kswapd etc..) will move pages between
860` * active_list <-> inactive_list while we don't take a lock. 833 * active_list <-> inactive_list while we don't take a lock.
861 * So, we have to do loop here until all lists are empty. 834 * So, we have to do loop here until all lists are empty.
862 */ 835 */
863 while (mem->res.usage > 0) { 836 while (mem->res.usage > 0) {
@@ -879,8 +852,6 @@ out:
879 return ret; 852 return ret;
880} 853}
881 854
882
883
884int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp) 855int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
885{ 856{
886 *tmp = memparse(buf, &buf); 857 *tmp = memparse(buf, &buf);
@@ -918,8 +889,7 @@ static ssize_t mem_force_empty_write(struct cgroup *cont,
918 size_t nbytes, loff_t *ppos) 889 size_t nbytes, loff_t *ppos)
919{ 890{
920 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 891 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
921 int ret; 892 int ret = mem_cgroup_force_empty(mem);
922 ret = mem_cgroup_force_empty(mem);
923 if (!ret) 893 if (!ret)
924 ret = nbytes; 894 ret = nbytes;
925 return ret; 895 return ret;
@@ -928,7 +898,6 @@ static ssize_t mem_force_empty_write(struct cgroup *cont,
928/* 898/*
929 * Note: This should be removed if cgroup supports write-only file. 899 * Note: This should be removed if cgroup supports write-only file.
930 */ 900 */
931
932static ssize_t mem_force_empty_read(struct cgroup *cont, 901static ssize_t mem_force_empty_read(struct cgroup *cont,
933 struct cftype *cft, 902 struct cftype *cft,
934 struct file *file, char __user *userbuf, 903 struct file *file, char __user *userbuf,
@@ -937,7 +906,6 @@ static ssize_t mem_force_empty_read(struct cgroup *cont,
937 return -EINVAL; 906 return -EINVAL;
938} 907}
939 908
940
941static const struct mem_cgroup_stat_desc { 909static const struct mem_cgroup_stat_desc {
942 const char *msg; 910 const char *msg;
943 u64 unit; 911 u64 unit;
@@ -990,8 +958,6 @@ static int mem_control_stat_open(struct inode *unused, struct file *file)
990 return single_open(file, mem_control_stat_show, cont); 958 return single_open(file, mem_control_stat_show, cont);
991} 959}
992 960
993
994
995static struct cftype mem_cgroup_files[] = { 961static struct cftype mem_cgroup_files[] = {
996 { 962 {
997 .name = "usage_in_bytes", 963 .name = "usage_in_bytes",
@@ -1057,9 +1023,6 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1057 kfree(mem->info.nodeinfo[node]); 1023 kfree(mem->info.nodeinfo[node]);
1058} 1024}
1059 1025
1060
1061static struct mem_cgroup init_mem_cgroup;
1062
1063static struct cgroup_subsys_state * 1026static struct cgroup_subsys_state *
1064mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 1027mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1065{ 1028{
@@ -1149,7 +1112,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
1149 1112
1150out: 1113out:
1151 mmput(mm); 1114 mmput(mm);
1152 return;
1153} 1115}
1154 1116
1155struct cgroup_subsys mem_cgroup_subsys = { 1117struct cgroup_subsys mem_cgroup_subsys = {