aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c179
1 files changed, 85 insertions, 94 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2e0bfc93484b..33add96cd5fb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -26,15 +26,18 @@
26#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
27#include <linux/bit_spinlock.h> 27#include <linux/bit_spinlock.h>
28#include <linux/rcupdate.h> 28#include <linux/rcupdate.h>
29#include <linux/slab.h>
29#include <linux/swap.h> 30#include <linux/swap.h>
30#include <linux/spinlock.h> 31#include <linux/spinlock.h>
31#include <linux/fs.h> 32#include <linux/fs.h>
32#include <linux/seq_file.h> 33#include <linux/seq_file.h>
34#include <linux/vmalloc.h>
33 35
34#include <asm/uaccess.h> 36#include <asm/uaccess.h>
35 37
36struct cgroup_subsys mem_cgroup_subsys; 38struct cgroup_subsys mem_cgroup_subsys;
37static const int MEM_CGROUP_RECLAIM_RETRIES = 5; 39static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
40static struct kmem_cache *page_cgroup_cache;
38 41
39/* 42/*
40 * Statistics for memory cgroup. 43 * Statistics for memory cgroup.
@@ -236,26 +239,12 @@ static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
236 css); 239 css);
237} 240}
238 241
239static struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) 242struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
240{ 243{
241 return container_of(task_subsys_state(p, mem_cgroup_subsys_id), 244 return container_of(task_subsys_state(p, mem_cgroup_subsys_id),
242 struct mem_cgroup, css); 245 struct mem_cgroup, css);
243} 246}
244 247
245void mm_init_cgroup(struct mm_struct *mm, struct task_struct *p)
246{
247 struct mem_cgroup *mem;
248
249 mem = mem_cgroup_from_task(p);
250 css_get(&mem->css);
251 mm->mem_cgroup = mem;
252}
253
254void mm_free_cgroup(struct mm_struct *mm)
255{
256 css_put(&mm->mem_cgroup->css);
257}
258
259static inline int page_cgroup_locked(struct page *page) 248static inline int page_cgroup_locked(struct page *page)
260{ 249{
261 return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); 250 return bit_spin_is_locked(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
@@ -287,10 +276,10 @@ static void unlock_page_cgroup(struct page *page)
287 bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup); 276 bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
288} 277}
289 278
290static void __mem_cgroup_remove_list(struct page_cgroup *pc) 279static void __mem_cgroup_remove_list(struct mem_cgroup_per_zone *mz,
280 struct page_cgroup *pc)
291{ 281{
292 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 282 int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
293 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
294 283
295 if (from) 284 if (from)
296 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1; 285 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) -= 1;
@@ -301,10 +290,10 @@ static void __mem_cgroup_remove_list(struct page_cgroup *pc)
301 list_del_init(&pc->lru); 290 list_del_init(&pc->lru);
302} 291}
303 292
304static void __mem_cgroup_add_list(struct page_cgroup *pc) 293static void __mem_cgroup_add_list(struct mem_cgroup_per_zone *mz,
294 struct page_cgroup *pc)
305{ 295{
306 int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE; 296 int to = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
307 struct mem_cgroup_per_zone *mz = page_cgroup_zoneinfo(pc);
308 297
309 if (!to) { 298 if (!to) {
310 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1; 299 MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
@@ -476,6 +465,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
476 int zid = zone_idx(z); 465 int zid = zone_idx(z);
477 struct mem_cgroup_per_zone *mz; 466 struct mem_cgroup_per_zone *mz;
478 467
468 BUG_ON(!mem_cont);
479 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid); 469 mz = mem_cgroup_zoneinfo(mem_cont, nid, zid);
480 if (active) 470 if (active)
481 src = &mz->active_list; 471 src = &mz->active_list;
@@ -560,7 +550,7 @@ retry:
560 } 550 }
561 unlock_page_cgroup(page); 551 unlock_page_cgroup(page);
562 552
563 pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); 553 pc = kmem_cache_zalloc(page_cgroup_cache, gfp_mask);
564 if (pc == NULL) 554 if (pc == NULL)
565 goto err; 555 goto err;
566 556
@@ -574,7 +564,7 @@ retry:
574 mm = &init_mm; 564 mm = &init_mm;
575 565
576 rcu_read_lock(); 566 rcu_read_lock();
577 mem = rcu_dereference(mm->mem_cgroup); 567 mem = mem_cgroup_from_task(rcu_dereference(mm->owner));
578 /* 568 /*
579 * For every charge from the cgroup, increment reference count 569 * For every charge from the cgroup, increment reference count
580 */ 570 */
@@ -602,7 +592,6 @@ retry:
602 mem_cgroup_out_of_memory(mem, gfp_mask); 592 mem_cgroup_out_of_memory(mem, gfp_mask);
603 goto out; 593 goto out;
604 } 594 }
605 congestion_wait(WRITE, HZ/10);
606 } 595 }
607 596
608 pc->ref_cnt = 1; 597 pc->ref_cnt = 1;
@@ -610,7 +599,7 @@ retry:
610 pc->page = page; 599 pc->page = page;
611 pc->flags = PAGE_CGROUP_FLAG_ACTIVE; 600 pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
612 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) 601 if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
613 pc->flags |= PAGE_CGROUP_FLAG_CACHE; 602 pc->flags = PAGE_CGROUP_FLAG_CACHE;
614 603
615 lock_page_cgroup(page); 604 lock_page_cgroup(page);
616 if (page_get_page_cgroup(page)) { 605 if (page_get_page_cgroup(page)) {
@@ -622,14 +611,14 @@ retry:
622 */ 611 */
623 res_counter_uncharge(&mem->res, PAGE_SIZE); 612 res_counter_uncharge(&mem->res, PAGE_SIZE);
624 css_put(&mem->css); 613 css_put(&mem->css);
625 kfree(pc); 614 kmem_cache_free(page_cgroup_cache, pc);
626 goto retry; 615 goto retry;
627 } 616 }
628 page_assign_page_cgroup(page, pc); 617 page_assign_page_cgroup(page, pc);
629 618
630 mz = page_cgroup_zoneinfo(pc); 619 mz = page_cgroup_zoneinfo(pc);
631 spin_lock_irqsave(&mz->lru_lock, flags); 620 spin_lock_irqsave(&mz->lru_lock, flags);
632 __mem_cgroup_add_list(pc); 621 __mem_cgroup_add_list(mz, pc);
633 spin_unlock_irqrestore(&mz->lru_lock, flags); 622 spin_unlock_irqrestore(&mz->lru_lock, flags);
634 623
635 unlock_page_cgroup(page); 624 unlock_page_cgroup(page);
@@ -637,7 +626,7 @@ done:
637 return 0; 626 return 0;
638out: 627out:
639 css_put(&mem->css); 628 css_put(&mem->css);
640 kfree(pc); 629 kmem_cache_free(page_cgroup_cache, pc);
641err: 630err:
642 return -ENOMEM; 631 return -ENOMEM;
643} 632}
@@ -685,7 +674,7 @@ void mem_cgroup_uncharge_page(struct page *page)
685 if (--(pc->ref_cnt) == 0) { 674 if (--(pc->ref_cnt) == 0) {
686 mz = page_cgroup_zoneinfo(pc); 675 mz = page_cgroup_zoneinfo(pc);
687 spin_lock_irqsave(&mz->lru_lock, flags); 676 spin_lock_irqsave(&mz->lru_lock, flags);
688 __mem_cgroup_remove_list(pc); 677 __mem_cgroup_remove_list(mz, pc);
689 spin_unlock_irqrestore(&mz->lru_lock, flags); 678 spin_unlock_irqrestore(&mz->lru_lock, flags);
690 679
691 page_assign_page_cgroup(page, NULL); 680 page_assign_page_cgroup(page, NULL);
@@ -695,7 +684,7 @@ void mem_cgroup_uncharge_page(struct page *page)
695 res_counter_uncharge(&mem->res, PAGE_SIZE); 684 res_counter_uncharge(&mem->res, PAGE_SIZE);
696 css_put(&mem->css); 685 css_put(&mem->css);
697 686
698 kfree(pc); 687 kmem_cache_free(page_cgroup_cache, pc);
699 return; 688 return;
700 } 689 }
701 690
@@ -747,7 +736,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
747 736
748 mz = page_cgroup_zoneinfo(pc); 737 mz = page_cgroup_zoneinfo(pc);
749 spin_lock_irqsave(&mz->lru_lock, flags); 738 spin_lock_irqsave(&mz->lru_lock, flags);
750 __mem_cgroup_remove_list(pc); 739 __mem_cgroup_remove_list(mz, pc);
751 spin_unlock_irqrestore(&mz->lru_lock, flags); 740 spin_unlock_irqrestore(&mz->lru_lock, flags);
752 741
753 page_assign_page_cgroup(page, NULL); 742 page_assign_page_cgroup(page, NULL);
@@ -759,7 +748,7 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
759 748
760 mz = page_cgroup_zoneinfo(pc); 749 mz = page_cgroup_zoneinfo(pc);
761 spin_lock_irqsave(&mz->lru_lock, flags); 750 spin_lock_irqsave(&mz->lru_lock, flags);
762 __mem_cgroup_add_list(pc); 751 __mem_cgroup_add_list(mz, pc);
763 spin_unlock_irqrestore(&mz->lru_lock, flags); 752 spin_unlock_irqrestore(&mz->lru_lock, flags);
764 753
765 unlock_page_cgroup(newpage); 754 unlock_page_cgroup(newpage);
@@ -853,13 +842,10 @@ static int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
853 return 0; 842 return 0;
854} 843}
855 844
856static ssize_t mem_cgroup_read(struct cgroup *cont, 845static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
857 struct cftype *cft, struct file *file,
858 char __user *userbuf, size_t nbytes, loff_t *ppos)
859{ 846{
860 return res_counter_read(&mem_cgroup_from_cont(cont)->res, 847 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
861 cft->private, userbuf, nbytes, ppos, 848 cft->private);
862 NULL);
863} 849}
864 850
865static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft, 851static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
@@ -871,27 +857,25 @@ static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
871 mem_cgroup_write_strategy); 857 mem_cgroup_write_strategy);
872} 858}
873 859
874static ssize_t mem_force_empty_write(struct cgroup *cont, 860static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
875 struct cftype *cft, struct file *file,
876 const char __user *userbuf,
877 size_t nbytes, loff_t *ppos)
878{ 861{
879 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 862 struct mem_cgroup *mem;
880 int ret = mem_cgroup_force_empty(mem); 863
881 if (!ret) 864 mem = mem_cgroup_from_cont(cont);
882 ret = nbytes; 865 switch (event) {
883 return ret; 866 case RES_MAX_USAGE:
867 res_counter_reset_max(&mem->res);
868 break;
869 case RES_FAILCNT:
870 res_counter_reset_failcnt(&mem->res);
871 break;
872 }
873 return 0;
884} 874}
885 875
886/* 876static int mem_force_empty_write(struct cgroup *cont, unsigned int event)
887 * Note: This should be removed if cgroup supports write-only file.
888 */
889static ssize_t mem_force_empty_read(struct cgroup *cont,
890 struct cftype *cft,
891 struct file *file, char __user *userbuf,
892 size_t nbytes, loff_t *ppos)
893{ 877{
894 return -EINVAL; 878 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont));
895} 879}
896 880
897static const struct mem_cgroup_stat_desc { 881static const struct mem_cgroup_stat_desc {
@@ -902,9 +886,9 @@ static const struct mem_cgroup_stat_desc {
902 [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, }, 886 [MEM_CGROUP_STAT_RSS] = { "rss", PAGE_SIZE, },
903}; 887};
904 888
905static int mem_control_stat_show(struct seq_file *m, void *arg) 889static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
890 struct cgroup_map_cb *cb)
906{ 891{
907 struct cgroup *cont = m->private;
908 struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont); 892 struct mem_cgroup *mem_cont = mem_cgroup_from_cont(cont);
909 struct mem_cgroup_stat *stat = &mem_cont->stat; 893 struct mem_cgroup_stat *stat = &mem_cont->stat;
910 int i; 894 int i;
@@ -914,8 +898,7 @@ static int mem_control_stat_show(struct seq_file *m, void *arg)
914 898
915 val = mem_cgroup_read_stat(stat, i); 899 val = mem_cgroup_read_stat(stat, i);
916 val *= mem_cgroup_stat_desc[i].unit; 900 val *= mem_cgroup_stat_desc[i].unit;
917 seq_printf(m, "%s %lld\n", mem_cgroup_stat_desc[i].msg, 901 cb->fill(cb, mem_cgroup_stat_desc[i].msg, val);
918 (long long)val);
919 } 902 }
920 /* showing # of active pages */ 903 /* showing # of active pages */
921 { 904 {
@@ -925,52 +908,43 @@ static int mem_control_stat_show(struct seq_file *m, void *arg)
925 MEM_CGROUP_ZSTAT_INACTIVE); 908 MEM_CGROUP_ZSTAT_INACTIVE);
926 active = mem_cgroup_get_all_zonestat(mem_cont, 909 active = mem_cgroup_get_all_zonestat(mem_cont,
927 MEM_CGROUP_ZSTAT_ACTIVE); 910 MEM_CGROUP_ZSTAT_ACTIVE);
928 seq_printf(m, "active %ld\n", (active) * PAGE_SIZE); 911 cb->fill(cb, "active", (active) * PAGE_SIZE);
929 seq_printf(m, "inactive %ld\n", (inactive) * PAGE_SIZE); 912 cb->fill(cb, "inactive", (inactive) * PAGE_SIZE);
930 } 913 }
931 return 0; 914 return 0;
932} 915}
933 916
934static const struct file_operations mem_control_stat_file_operations = {
935 .read = seq_read,
936 .llseek = seq_lseek,
937 .release = single_release,
938};
939
940static int mem_control_stat_open(struct inode *unused, struct file *file)
941{
942 /* XXX __d_cont */
943 struct cgroup *cont = file->f_dentry->d_parent->d_fsdata;
944
945 file->f_op = &mem_control_stat_file_operations;
946 return single_open(file, mem_control_stat_show, cont);
947}
948
949static struct cftype mem_cgroup_files[] = { 917static struct cftype mem_cgroup_files[] = {
950 { 918 {
951 .name = "usage_in_bytes", 919 .name = "usage_in_bytes",
952 .private = RES_USAGE, 920 .private = RES_USAGE,
953 .read = mem_cgroup_read, 921 .read_u64 = mem_cgroup_read,
922 },
923 {
924 .name = "max_usage_in_bytes",
925 .private = RES_MAX_USAGE,
926 .trigger = mem_cgroup_reset,
927 .read_u64 = mem_cgroup_read,
954 }, 928 },
955 { 929 {
956 .name = "limit_in_bytes", 930 .name = "limit_in_bytes",
957 .private = RES_LIMIT, 931 .private = RES_LIMIT,
958 .write = mem_cgroup_write, 932 .write = mem_cgroup_write,
959 .read = mem_cgroup_read, 933 .read_u64 = mem_cgroup_read,
960 }, 934 },
961 { 935 {
962 .name = "failcnt", 936 .name = "failcnt",
963 .private = RES_FAILCNT, 937 .private = RES_FAILCNT,
964 .read = mem_cgroup_read, 938 .trigger = mem_cgroup_reset,
939 .read_u64 = mem_cgroup_read,
965 }, 940 },
966 { 941 {
967 .name = "force_empty", 942 .name = "force_empty",
968 .write = mem_force_empty_write, 943 .trigger = mem_force_empty_write,
969 .read = mem_force_empty_read,
970 }, 944 },
971 { 945 {
972 .name = "stat", 946 .name = "stat",
973 .open = mem_control_stat_open, 947 .read_map = mem_control_stat_show,
974 }, 948 },
975}; 949};
976 950
@@ -1010,6 +984,29 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
1010 kfree(mem->info.nodeinfo[node]); 984 kfree(mem->info.nodeinfo[node]);
1011} 985}
1012 986
987static struct mem_cgroup *mem_cgroup_alloc(void)
988{
989 struct mem_cgroup *mem;
990
991 if (sizeof(*mem) < PAGE_SIZE)
992 mem = kmalloc(sizeof(*mem), GFP_KERNEL);
993 else
994 mem = vmalloc(sizeof(*mem));
995
996 if (mem)
997 memset(mem, 0, sizeof(*mem));
998 return mem;
999}
1000
1001static void mem_cgroup_free(struct mem_cgroup *mem)
1002{
1003 if (sizeof(*mem) < PAGE_SIZE)
1004 kfree(mem);
1005 else
1006 vfree(mem);
1007}
1008
1009
1013static struct cgroup_subsys_state * 1010static struct cgroup_subsys_state *
1014mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) 1011mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1015{ 1012{
@@ -1018,17 +1015,15 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
1018 1015
1019 if (unlikely((cont->parent) == NULL)) { 1016 if (unlikely((cont->parent) == NULL)) {
1020 mem = &init_mem_cgroup; 1017 mem = &init_mem_cgroup;
1021 init_mm.mem_cgroup = mem; 1018 page_cgroup_cache = KMEM_CACHE(page_cgroup, SLAB_PANIC);
1022 } else 1019 } else {
1023 mem = kzalloc(sizeof(struct mem_cgroup), GFP_KERNEL); 1020 mem = mem_cgroup_alloc();
1024 1021 if (!mem)
1025 if (mem == NULL) 1022 return ERR_PTR(-ENOMEM);
1026 return ERR_PTR(-ENOMEM); 1023 }
1027 1024
1028 res_counter_init(&mem->res); 1025 res_counter_init(&mem->res);
1029 1026
1030 memset(&mem->info, 0, sizeof(mem->info));
1031
1032 for_each_node_state(node, N_POSSIBLE) 1027 for_each_node_state(node, N_POSSIBLE)
1033 if (alloc_mem_cgroup_per_zone_info(mem, node)) 1028 if (alloc_mem_cgroup_per_zone_info(mem, node))
1034 goto free_out; 1029 goto free_out;
@@ -1038,7 +1033,7 @@ free_out:
1038 for_each_node_state(node, N_POSSIBLE) 1033 for_each_node_state(node, N_POSSIBLE)
1039 free_mem_cgroup_per_zone_info(mem, node); 1034 free_mem_cgroup_per_zone_info(mem, node);
1040 if (cont->parent != NULL) 1035 if (cont->parent != NULL)
1041 kfree(mem); 1036 mem_cgroup_free(mem);
1042 return ERR_PTR(-ENOMEM); 1037 return ERR_PTR(-ENOMEM);
1043} 1038}
1044 1039
@@ -1058,7 +1053,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
1058 for_each_node_state(node, N_POSSIBLE) 1053 for_each_node_state(node, N_POSSIBLE)
1059 free_mem_cgroup_per_zone_info(mem, node); 1054 free_mem_cgroup_per_zone_info(mem, node);
1060 1055
1061 kfree(mem_cgroup_from_cont(cont)); 1056 mem_cgroup_free(mem_cgroup_from_cont(cont));
1062} 1057}
1063 1058
1064static int mem_cgroup_populate(struct cgroup_subsys *ss, 1059static int mem_cgroup_populate(struct cgroup_subsys *ss,
@@ -1098,10 +1093,6 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
1098 if (!thread_group_leader(p)) 1093 if (!thread_group_leader(p))
1099 goto out; 1094 goto out;
1100 1095
1101 css_get(&mem->css);
1102 rcu_assign_pointer(mm->mem_cgroup, mem);
1103 css_put(&old_mem->css);
1104
1105out: 1096out:
1106 mmput(mm); 1097 mmput(mm);
1107} 1098}