aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/controllers/memory.txt27
-rw-r--r--mm/memcontrol.c41
2 files changed, 57 insertions, 11 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt
index 58f32c166fac..54253b7a8db2 100644
--- a/Documentation/controllers/memory.txt
+++ b/Documentation/controllers/memory.txt
@@ -237,11 +237,30 @@ reclaimed.
237A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a 237A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
238cgroup might have some charge associated with it, even though all 238cgroup might have some charge associated with it, even though all
239tasks have migrated away from it. 239tasks have migrated away from it.
240Such charges are moved to its parent as much as possible and freed if parent 240Such charges are freed(at default) or moved to its parent. When moved,
241is full. Both of RSS and CACHES are moved to parent. 241both of RSS and CACHES are moved to parent.
242If both of them are busy, rmdir() returns -EBUSY. 242If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also.
243 243
2445. TODO 2445. Misc. interfaces.
245
2465.1 force_empty
247 memory.force_empty interface is provided to make cgroup's memory usage empty.
248 You can use this interface only when the cgroup has no tasks.
249 When writing anything to this
250
251 # echo 0 > memory.force_empty
252
253 Almost all pages tracked by this memcg will be unmapped and freed. Some of
254 pages cannot be freed because it's locked or in-use. Such pages are moved
255 to parent and this cgroup will be empty. But this may return -EBUSY in
256 some too busy case.
257
258 Typical use case of this interface is that calling this before rmdir().
259 Because rmdir() moves all pages to parent, some out-of-use page caches can be
260 moved to the parent. If you want to avoid that, force_empty will be useful.
261
262
2636. TODO
245 264
2461. Add support for accounting huge pages (as a separate controller) 2651. Add support for accounting huge pages (as a separate controller)
2472. Make per-cgroup scanner reclaim not-shared pages first 2662. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e00f25e6545f..decace3bb57e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1062,21 +1062,27 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem,
1062 * make mem_cgroup's charge to be 0 if there is no task. 1062 * make mem_cgroup's charge to be 0 if there is no task.
1063 * This enables deleting this mem_cgroup. 1063 * This enables deleting this mem_cgroup.
1064 */ 1064 */
1065static int mem_cgroup_force_empty(struct mem_cgroup *mem) 1065static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all)
1066{ 1066{
1067 int ret; 1067 int ret;
1068 int node, zid, shrink; 1068 int node, zid, shrink;
1069 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 1069 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
1070 struct cgroup *cgrp = mem->css.cgroup;
1070 1071
1071 css_get(&mem->css); 1072 css_get(&mem->css);
1072 1073
1073 shrink = 0; 1074 shrink = 0;
1075 /* should free all ? */
1076 if (free_all)
1077 goto try_to_free;
1074move_account: 1078move_account:
1075 while (mem->res.usage > 0) { 1079 while (mem->res.usage > 0) {
1076 ret = -EBUSY; 1080 ret = -EBUSY;
1077 if (atomic_read(&mem->css.cgroup->count) > 0) 1081 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children))
1082 goto out;
1083 ret = -EINTR;
1084 if (signal_pending(current))
1078 goto out; 1085 goto out;
1079
1080 /* This is for making all *used* pages to be on LRU. */ 1086 /* This is for making all *used* pages to be on LRU. */
1081 lru_add_drain_all(); 1087 lru_add_drain_all();
1082 ret = 0; 1088 ret = 0;
@@ -1106,19 +1112,29 @@ out:
1106 return ret; 1112 return ret;
1107 1113
1108try_to_free: 1114try_to_free:
1109 /* returns EBUSY if we come here twice. */ 1115 /* returns EBUSY if there is a task or if we come here twice. */
1110 if (shrink) { 1116 if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) {
1111 ret = -EBUSY; 1117 ret = -EBUSY;
1112 goto out; 1118 goto out;
1113 } 1119 }
1120 /* we call try-to-free pages for make this cgroup empty */
1121 lru_add_drain_all();
1114 /* try to free all pages in this cgroup */ 1122 /* try to free all pages in this cgroup */
1115 shrink = 1; 1123 shrink = 1;
1116 while (nr_retries && mem->res.usage > 0) { 1124 while (nr_retries && mem->res.usage > 0) {
1117 int progress; 1125 int progress;
1126
1127 if (signal_pending(current)) {
1128 ret = -EINTR;
1129 goto out;
1130 }
1118 progress = try_to_free_mem_cgroup_pages(mem, 1131 progress = try_to_free_mem_cgroup_pages(mem,
1119 GFP_HIGHUSER_MOVABLE); 1132 GFP_HIGHUSER_MOVABLE);
1120 if (!progress) 1133 if (!progress) {
1121 nr_retries--; 1134 nr_retries--;
1135 /* maybe some writeback is necessary */
1136 congestion_wait(WRITE, HZ/10);
1137 }
1122 1138
1123 } 1139 }
1124 /* try move_account...there may be some *locked* pages. */ 1140 /* try move_account...there may be some *locked* pages. */
@@ -1128,6 +1144,12 @@ try_to_free:
1128 goto out; 1144 goto out;
1129} 1145}
1130 1146
1147int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event)
1148{
1149 return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true);
1150}
1151
1152
1131static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 1153static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
1132{ 1154{
1133 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, 1155 return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res,
@@ -1225,6 +1247,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft,
1225 return 0; 1247 return 0;
1226} 1248}
1227 1249
1250
1228static struct cftype mem_cgroup_files[] = { 1251static struct cftype mem_cgroup_files[] = {
1229 { 1252 {
1230 .name = "usage_in_bytes", 1253 .name = "usage_in_bytes",
@@ -1253,6 +1276,10 @@ static struct cftype mem_cgroup_files[] = {
1253 .name = "stat", 1276 .name = "stat",
1254 .read_map = mem_control_stat_show, 1277 .read_map = mem_control_stat_show,
1255 }, 1278 },
1279 {
1280 .name = "force_empty",
1281 .trigger = mem_cgroup_force_empty_write,
1282 },
1256}; 1283};
1257 1284
1258static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) 1285static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node)
@@ -1350,7 +1377,7 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
1350 struct cgroup *cont) 1377 struct cgroup *cont)
1351{ 1378{
1352 struct mem_cgroup *mem = mem_cgroup_from_cont(cont); 1379 struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
1353 mem_cgroup_force_empty(mem); 1380 mem_cgroup_force_empty(mem, false);
1354} 1381}
1355 1382
1356static void mem_cgroup_destroy(struct cgroup_subsys *ss, 1383static void mem_cgroup_destroy(struct cgroup_subsys *ss,