diff options
-rw-r--r-- | Documentation/controllers/memory.txt | 27 | ||||
-rw-r--r-- | mm/memcontrol.c | 41 |
2 files changed, 57 insertions, 11 deletions
diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt index 58f32c166fac..54253b7a8db2 100644 --- a/Documentation/controllers/memory.txt +++ b/Documentation/controllers/memory.txt | |||
@@ -237,11 +237,30 @@ reclaimed. | |||
237 | A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a | 237 | A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a |
238 | cgroup might have some charge associated with it, even though all | 238 | cgroup might have some charge associated with it, even though all |
239 | tasks have migrated away from it. | 239 | tasks have migrated away from it. |
240 | Such charges are moved to its parent as much as possible and freed if parent | 240 | Such charges are freed(at default) or moved to its parent. When moved, |
241 | is full. Both of RSS and CACHES are moved to parent. | 241 | both of RSS and CACHES are moved to parent. |
242 | If both of them are busy, rmdir() returns -EBUSY. | 242 | If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. |
243 | 243 | ||
244 | 5. TODO | 244 | 5. Misc. interfaces. |
245 | |||
246 | 5.1 force_empty | ||
247 | memory.force_empty interface is provided to make cgroup's memory usage empty. | ||
248 | You can use this interface only when the cgroup has no tasks. | ||
249 | When writing anything to this | ||
250 | |||
251 | # echo 0 > memory.force_empty | ||
252 | |||
253 | Almost all pages tracked by this memcg will be unmapped and freed. Some of | ||
254 | pages cannot be freed because it's locked or in-use. Such pages are moved | ||
255 | to parent and this cgroup will be empty. But this may return -EBUSY in | ||
256 | some too busy case. | ||
257 | |||
258 | Typical use case of this interface is that calling this before rmdir(). | ||
259 | Because rmdir() moves all pages to parent, some out-of-use page caches can be | ||
260 | moved to the parent. If you want to avoid that, force_empty will be useful. | ||
261 | |||
262 | |||
263 | 6. TODO | ||
245 | 264 | ||
246 | 1. Add support for accounting huge pages (as a separate controller) | 265 | 1. Add support for accounting huge pages (as a separate controller) |
247 | 2. Make per-cgroup scanner reclaim not-shared pages first | 266 | 2. Make per-cgroup scanner reclaim not-shared pages first |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e00f25e6545f..decace3bb57e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1062,21 +1062,27 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
1062 | * make mem_cgroup's charge to be 0 if there is no task. | 1062 | * make mem_cgroup's charge to be 0 if there is no task. |
1063 | * This enables deleting this mem_cgroup. | 1063 | * This enables deleting this mem_cgroup. |
1064 | */ | 1064 | */ |
1065 | static int mem_cgroup_force_empty(struct mem_cgroup *mem) | 1065 | static int mem_cgroup_force_empty(struct mem_cgroup *mem, bool free_all) |
1066 | { | 1066 | { |
1067 | int ret; | 1067 | int ret; |
1068 | int node, zid, shrink; | 1068 | int node, zid, shrink; |
1069 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1069 | int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1070 | struct cgroup *cgrp = mem->css.cgroup; | ||
1070 | 1071 | ||
1071 | css_get(&mem->css); | 1072 | css_get(&mem->css); |
1072 | 1073 | ||
1073 | shrink = 0; | 1074 | shrink = 0; |
1075 | /* should free all ? */ | ||
1076 | if (free_all) | ||
1077 | goto try_to_free; | ||
1074 | move_account: | 1078 | move_account: |
1075 | while (mem->res.usage > 0) { | 1079 | while (mem->res.usage > 0) { |
1076 | ret = -EBUSY; | 1080 | ret = -EBUSY; |
1077 | if (atomic_read(&mem->css.cgroup->count) > 0) | 1081 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children)) |
1082 | goto out; | ||
1083 | ret = -EINTR; | ||
1084 | if (signal_pending(current)) | ||
1078 | goto out; | 1085 | goto out; |
1079 | |||
1080 | /* This is for making all *used* pages to be on LRU. */ | 1086 | /* This is for making all *used* pages to be on LRU. */ |
1081 | lru_add_drain_all(); | 1087 | lru_add_drain_all(); |
1082 | ret = 0; | 1088 | ret = 0; |
@@ -1106,19 +1112,29 @@ out: | |||
1106 | return ret; | 1112 | return ret; |
1107 | 1113 | ||
1108 | try_to_free: | 1114 | try_to_free: |
1109 | /* returns EBUSY if we come here twice. */ | 1115 | /* returns EBUSY if there is a task or if we come here twice. */ |
1110 | if (shrink) { | 1116 | if (cgroup_task_count(cgrp) || !list_empty(&cgrp->children) || shrink) { |
1111 | ret = -EBUSY; | 1117 | ret = -EBUSY; |
1112 | goto out; | 1118 | goto out; |
1113 | } | 1119 | } |
1120 | /* we call try-to-free pages for make this cgroup empty */ | ||
1121 | lru_add_drain_all(); | ||
1114 | /* try to free all pages in this cgroup */ | 1122 | /* try to free all pages in this cgroup */ |
1115 | shrink = 1; | 1123 | shrink = 1; |
1116 | while (nr_retries && mem->res.usage > 0) { | 1124 | while (nr_retries && mem->res.usage > 0) { |
1117 | int progress; | 1125 | int progress; |
1126 | |||
1127 | if (signal_pending(current)) { | ||
1128 | ret = -EINTR; | ||
1129 | goto out; | ||
1130 | } | ||
1118 | progress = try_to_free_mem_cgroup_pages(mem, | 1131 | progress = try_to_free_mem_cgroup_pages(mem, |
1119 | GFP_HIGHUSER_MOVABLE); | 1132 | GFP_HIGHUSER_MOVABLE); |
1120 | if (!progress) | 1133 | if (!progress) { |
1121 | nr_retries--; | 1134 | nr_retries--; |
1135 | /* maybe some writeback is necessary */ | ||
1136 | congestion_wait(WRITE, HZ/10); | ||
1137 | } | ||
1122 | 1138 | ||
1123 | } | 1139 | } |
1124 | /* try move_account...there may be some *locked* pages. */ | 1140 | /* try move_account...there may be some *locked* pages. */ |
@@ -1128,6 +1144,12 @@ try_to_free: | |||
1128 | goto out; | 1144 | goto out; |
1129 | } | 1145 | } |
1130 | 1146 | ||
1147 | int mem_cgroup_force_empty_write(struct cgroup *cont, unsigned int event) | ||
1148 | { | ||
1149 | return mem_cgroup_force_empty(mem_cgroup_from_cont(cont), true); | ||
1150 | } | ||
1151 | |||
1152 | |||
1131 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) | 1153 | static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) |
1132 | { | 1154 | { |
1133 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, | 1155 | return res_counter_read_u64(&mem_cgroup_from_cont(cont)->res, |
@@ -1225,6 +1247,7 @@ static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | |||
1225 | return 0; | 1247 | return 0; |
1226 | } | 1248 | } |
1227 | 1249 | ||
1250 | |||
1228 | static struct cftype mem_cgroup_files[] = { | 1251 | static struct cftype mem_cgroup_files[] = { |
1229 | { | 1252 | { |
1230 | .name = "usage_in_bytes", | 1253 | .name = "usage_in_bytes", |
@@ -1253,6 +1276,10 @@ static struct cftype mem_cgroup_files[] = { | |||
1253 | .name = "stat", | 1276 | .name = "stat", |
1254 | .read_map = mem_control_stat_show, | 1277 | .read_map = mem_control_stat_show, |
1255 | }, | 1278 | }, |
1279 | { | ||
1280 | .name = "force_empty", | ||
1281 | .trigger = mem_cgroup_force_empty_write, | ||
1282 | }, | ||
1256 | }; | 1283 | }; |
1257 | 1284 | ||
1258 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | 1285 | static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) |
@@ -1350,7 +1377,7 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, | |||
1350 | struct cgroup *cont) | 1377 | struct cgroup *cont) |
1351 | { | 1378 | { |
1352 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); | 1379 | struct mem_cgroup *mem = mem_cgroup_from_cont(cont); |
1353 | mem_cgroup_force_empty(mem); | 1380 | mem_cgroup_force_empty(mem, false); |
1354 | } | 1381 | } |
1355 | 1382 | ||
1356 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, | 1383 | static void mem_cgroup_destroy(struct cgroup_subsys *ss, |