diff options
author | Tejun Heo <tj@kernel.org> | 2015-11-03 17:29:03 -0500 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2015-11-03 17:29:03 -0500 |
commit | 159b5bb46492e4dcef2070b12861030bc360402b (patch) | |
tree | 93de7d6e94a059aade50ee5437de6a50ccd1cf7b /mm/memcontrol.c | |
parent | 56e74338a535cbcc2f2da08b1ea1a92920194364 (diff) | |
parent | 469eabb3aec03d9defed3462df743a223a5c8f54 (diff) |
Merge branch 'for-4.3-fixes' into for-4.4
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 470 |
1 files changed, 85 insertions, 385 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1af057575ce9..6ddaeba34e09 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -111,56 +111,10 @@ static const char * const mem_cgroup_lru_names[] = { | |||
111 | "unevictable", | 111 | "unevictable", |
112 | }; | 112 | }; |
113 | 113 | ||
114 | /* | ||
115 | * Per memcg event counter is incremented at every pagein/pageout. With THP, | ||
116 | * it will be incremated by the number of pages. This counter is used for | ||
117 | * for trigger some periodic events. This is straightforward and better | ||
118 | * than using jiffies etc. to handle periodic memcg event. | ||
119 | */ | ||
120 | enum mem_cgroup_events_target { | ||
121 | MEM_CGROUP_TARGET_THRESH, | ||
122 | MEM_CGROUP_TARGET_SOFTLIMIT, | ||
123 | MEM_CGROUP_TARGET_NUMAINFO, | ||
124 | MEM_CGROUP_NTARGETS, | ||
125 | }; | ||
126 | #define THRESHOLDS_EVENTS_TARGET 128 | 114 | #define THRESHOLDS_EVENTS_TARGET 128 |
127 | #define SOFTLIMIT_EVENTS_TARGET 1024 | 115 | #define SOFTLIMIT_EVENTS_TARGET 1024 |
128 | #define NUMAINFO_EVENTS_TARGET 1024 | 116 | #define NUMAINFO_EVENTS_TARGET 1024 |
129 | 117 | ||
130 | struct mem_cgroup_stat_cpu { | ||
131 | long count[MEM_CGROUP_STAT_NSTATS]; | ||
132 | unsigned long events[MEMCG_NR_EVENTS]; | ||
133 | unsigned long nr_page_events; | ||
134 | unsigned long targets[MEM_CGROUP_NTARGETS]; | ||
135 | }; | ||
136 | |||
137 | struct reclaim_iter { | ||
138 | struct mem_cgroup *position; | ||
139 | /* scan generation, increased every round-trip */ | ||
140 | unsigned int generation; | ||
141 | }; | ||
142 | |||
143 | /* | ||
144 | * per-zone information in memory controller. | ||
145 | */ | ||
146 | struct mem_cgroup_per_zone { | ||
147 | struct lruvec lruvec; | ||
148 | unsigned long lru_size[NR_LRU_LISTS]; | ||
149 | |||
150 | struct reclaim_iter iter[DEF_PRIORITY + 1]; | ||
151 | |||
152 | struct rb_node tree_node; /* RB tree node */ | ||
153 | unsigned long usage_in_excess;/* Set to the value by which */ | ||
154 | /* the soft limit is exceeded*/ | ||
155 | bool on_tree; | ||
156 | struct mem_cgroup *memcg; /* Back pointer, we cannot */ | ||
157 | /* use container_of */ | ||
158 | }; | ||
159 | |||
160 | struct mem_cgroup_per_node { | ||
161 | struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; | ||
162 | }; | ||
163 | |||
164 | /* | 118 | /* |
165 | * Cgroups above their limits are maintained in a RB-Tree, independent of | 119 | * Cgroups above their limits are maintained in a RB-Tree, independent of |
166 | * their hierarchy representation | 120 | * their hierarchy representation |
@@ -181,32 +135,6 @@ struct mem_cgroup_tree { | |||
181 | 135 | ||
182 | static struct mem_cgroup_tree soft_limit_tree __read_mostly; | 136 | static struct mem_cgroup_tree soft_limit_tree __read_mostly; |
183 | 137 | ||
184 | struct mem_cgroup_threshold { | ||
185 | struct eventfd_ctx *eventfd; | ||
186 | unsigned long threshold; | ||
187 | }; | ||
188 | |||
189 | /* For threshold */ | ||
190 | struct mem_cgroup_threshold_ary { | ||
191 | /* An array index points to threshold just below or equal to usage. */ | ||
192 | int current_threshold; | ||
193 | /* Size of entries[] */ | ||
194 | unsigned int size; | ||
195 | /* Array of thresholds */ | ||
196 | struct mem_cgroup_threshold entries[0]; | ||
197 | }; | ||
198 | |||
199 | struct mem_cgroup_thresholds { | ||
200 | /* Primary thresholds array */ | ||
201 | struct mem_cgroup_threshold_ary *primary; | ||
202 | /* | ||
203 | * Spare threshold array. | ||
204 | * This is needed to make mem_cgroup_unregister_event() "never fail". | ||
205 | * It must be able to store at least primary->size - 1 entries. | ||
206 | */ | ||
207 | struct mem_cgroup_threshold_ary *spare; | ||
208 | }; | ||
209 | |||
210 | /* for OOM */ | 138 | /* for OOM */ |
211 | struct mem_cgroup_eventfd_list { | 139 | struct mem_cgroup_eventfd_list { |
212 | struct list_head list; | 140 | struct list_head list; |
@@ -256,113 +184,6 @@ struct mem_cgroup_event { | |||
256 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); | 184 | static void mem_cgroup_threshold(struct mem_cgroup *memcg); |
257 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); | 185 | static void mem_cgroup_oom_notify(struct mem_cgroup *memcg); |
258 | 186 | ||
259 | /* | ||
260 | * The memory controller data structure. The memory controller controls both | ||
261 | * page cache and RSS per cgroup. We would eventually like to provide | ||
262 | * statistics based on the statistics developed by Rik Van Riel for clock-pro, | ||
263 | * to help the administrator determine what knobs to tune. | ||
264 | */ | ||
265 | struct mem_cgroup { | ||
266 | struct cgroup_subsys_state css; | ||
267 | |||
268 | /* Accounted resources */ | ||
269 | struct page_counter memory; | ||
270 | struct page_counter memsw; | ||
271 | struct page_counter kmem; | ||
272 | |||
273 | /* Normal memory consumption range */ | ||
274 | unsigned long low; | ||
275 | unsigned long high; | ||
276 | |||
277 | unsigned long soft_limit; | ||
278 | |||
279 | /* vmpressure notifications */ | ||
280 | struct vmpressure vmpressure; | ||
281 | |||
282 | /* css_online() has been completed */ | ||
283 | int initialized; | ||
284 | |||
285 | /* | ||
286 | * Should the accounting and control be hierarchical, per subtree? | ||
287 | */ | ||
288 | bool use_hierarchy; | ||
289 | |||
290 | /* protected by memcg_oom_lock */ | ||
291 | bool oom_lock; | ||
292 | int under_oom; | ||
293 | |||
294 | int swappiness; | ||
295 | /* OOM-Killer disable */ | ||
296 | int oom_kill_disable; | ||
297 | |||
298 | /* protect arrays of thresholds */ | ||
299 | struct mutex thresholds_lock; | ||
300 | |||
301 | /* thresholds for memory usage. RCU-protected */ | ||
302 | struct mem_cgroup_thresholds thresholds; | ||
303 | |||
304 | /* thresholds for mem+swap usage. RCU-protected */ | ||
305 | struct mem_cgroup_thresholds memsw_thresholds; | ||
306 | |||
307 | /* For oom notifier event fd */ | ||
308 | struct list_head oom_notify; | ||
309 | |||
310 | /* | ||
311 | * Should we move charges of a task when a task is moved into this | ||
312 | * mem_cgroup ? And what type of charges should we move ? | ||
313 | */ | ||
314 | unsigned long move_charge_at_immigrate; | ||
315 | /* | ||
316 | * set > 0 if pages under this cgroup are moving to other cgroup. | ||
317 | */ | ||
318 | atomic_t moving_account; | ||
319 | /* taken only while moving_account > 0 */ | ||
320 | spinlock_t move_lock; | ||
321 | struct task_struct *move_lock_task; | ||
322 | unsigned long move_lock_flags; | ||
323 | /* | ||
324 | * percpu counter. | ||
325 | */ | ||
326 | struct mem_cgroup_stat_cpu __percpu *stat; | ||
327 | spinlock_t pcp_counter_lock; | ||
328 | |||
329 | #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) | ||
330 | struct cg_proto tcp_mem; | ||
331 | #endif | ||
332 | #if defined(CONFIG_MEMCG_KMEM) | ||
333 | /* Index in the kmem_cache->memcg_params.memcg_caches array */ | ||
334 | int kmemcg_id; | ||
335 | bool kmem_acct_activated; | ||
336 | bool kmem_acct_active; | ||
337 | #endif | ||
338 | |||
339 | int last_scanned_node; | ||
340 | #if MAX_NUMNODES > 1 | ||
341 | nodemask_t scan_nodes; | ||
342 | atomic_t numainfo_events; | ||
343 | atomic_t numainfo_updating; | ||
344 | #endif | ||
345 | |||
346 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
347 | struct list_head cgwb_list; | ||
348 | struct wb_domain cgwb_domain; | ||
349 | #endif | ||
350 | |||
351 | /* List of events which userspace want to receive */ | ||
352 | struct list_head event_list; | ||
353 | spinlock_t event_list_lock; | ||
354 | |||
355 | struct mem_cgroup_per_node *nodeinfo[0]; | ||
356 | /* WARNING: nodeinfo must be the last member here */ | ||
357 | }; | ||
358 | |||
359 | #ifdef CONFIG_MEMCG_KMEM | ||
360 | bool memcg_kmem_is_active(struct mem_cgroup *memcg) | ||
361 | { | ||
362 | return memcg->kmem_acct_active; | ||
363 | } | ||
364 | #endif | ||
365 | |||
366 | /* Stuffs for move charges at task migration. */ | 187 | /* Stuffs for move charges at task migration. */ |
367 | /* | 188 | /* |
368 | * Types of charges to be moved. | 189 | * Types of charges to be moved. |
@@ -423,11 +244,6 @@ enum res_type { | |||
423 | */ | 244 | */ |
424 | static DEFINE_MUTEX(memcg_create_mutex); | 245 | static DEFINE_MUTEX(memcg_create_mutex); |
425 | 246 | ||
426 | struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *s) | ||
427 | { | ||
428 | return s ? container_of(s, struct mem_cgroup, css) : NULL; | ||
429 | } | ||
430 | |||
431 | /* Some nice accessors for the vmpressure. */ | 247 | /* Some nice accessors for the vmpressure. */ |
432 | struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) | 248 | struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) |
433 | { | 249 | { |
@@ -499,8 +315,7 @@ void sock_update_memcg(struct sock *sk) | |||
499 | rcu_read_lock(); | 315 | rcu_read_lock(); |
500 | memcg = mem_cgroup_from_task(current); | 316 | memcg = mem_cgroup_from_task(current); |
501 | cg_proto = sk->sk_prot->proto_cgroup(memcg); | 317 | cg_proto = sk->sk_prot->proto_cgroup(memcg); |
502 | if (!mem_cgroup_is_root(memcg) && | 318 | if (cg_proto && test_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags) && |
503 | memcg_proto_active(cg_proto) && | ||
504 | css_tryget_online(&memcg->css)) { | 319 | css_tryget_online(&memcg->css)) { |
505 | sk->sk_cgrp = cg_proto; | 320 | sk->sk_cgrp = cg_proto; |
506 | } | 321 | } |
@@ -593,11 +408,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone) | |||
593 | return &memcg->nodeinfo[nid]->zoneinfo[zid]; | 408 | return &memcg->nodeinfo[nid]->zoneinfo[zid]; |
594 | } | 409 | } |
595 | 410 | ||
596 | struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg) | ||
597 | { | ||
598 | return &memcg->css; | ||
599 | } | ||
600 | |||
601 | /** | 411 | /** |
602 | * mem_cgroup_css_from_page - css of the memcg associated with a page | 412 | * mem_cgroup_css_from_page - css of the memcg associated with a page |
603 | * @page: page of interest | 413 | * @page: page of interest |
@@ -631,6 +441,34 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page) | |||
631 | return &memcg->css; | 441 | return &memcg->css; |
632 | } | 442 | } |
633 | 443 | ||
444 | /** | ||
445 | * page_cgroup_ino - return inode number of the memcg a page is charged to | ||
446 | * @page: the page | ||
447 | * | ||
448 | * Look up the closest online ancestor of the memory cgroup @page is charged to | ||
449 | * and return its inode number or 0 if @page is not charged to any cgroup. It | ||
450 | * is safe to call this function without holding a reference to @page. | ||
451 | * | ||
452 | * Note, this function is inherently racy, because there is nothing to prevent | ||
453 | * the cgroup inode from getting torn down and potentially reallocated a moment | ||
454 | * after page_cgroup_ino() returns, so it only should be used by callers that | ||
455 | * do not care (such as procfs interfaces). | ||
456 | */ | ||
457 | ino_t page_cgroup_ino(struct page *page) | ||
458 | { | ||
459 | struct mem_cgroup *memcg; | ||
460 | unsigned long ino = 0; | ||
461 | |||
462 | rcu_read_lock(); | ||
463 | memcg = READ_ONCE(page->mem_cgroup); | ||
464 | while (memcg && !(memcg->css.flags & CSS_ONLINE)) | ||
465 | memcg = parent_mem_cgroup(memcg); | ||
466 | if (memcg) | ||
467 | ino = cgroup_ino(memcg->css.cgroup); | ||
468 | rcu_read_unlock(); | ||
469 | return ino; | ||
470 | } | ||
471 | |||
634 | static struct mem_cgroup_per_zone * | 472 | static struct mem_cgroup_per_zone * |
635 | mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) | 473 | mem_cgroup_page_zoneinfo(struct mem_cgroup *memcg, struct page *page) |
636 | { | 474 | { |
@@ -876,14 +714,6 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, | |||
876 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); | 714 | __this_cpu_add(memcg->stat->nr_page_events, nr_pages); |
877 | } | 715 | } |
878 | 716 | ||
879 | unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) | ||
880 | { | ||
881 | struct mem_cgroup_per_zone *mz; | ||
882 | |||
883 | mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); | ||
884 | return mz->lru_size[lru]; | ||
885 | } | ||
886 | |||
887 | static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, | 717 | static unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, |
888 | int nid, | 718 | int nid, |
889 | unsigned int lru_mask) | 719 | unsigned int lru_mask) |
@@ -986,6 +816,7 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | |||
986 | 816 | ||
987 | return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); | 817 | return mem_cgroup_from_css(task_css(p, memory_cgrp_id)); |
988 | } | 818 | } |
819 | EXPORT_SYMBOL(mem_cgroup_from_task); | ||
989 | 820 | ||
990 | static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) | 821 | static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) |
991 | { | 822 | { |
@@ -1031,7 +862,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1031 | struct mem_cgroup *prev, | 862 | struct mem_cgroup *prev, |
1032 | struct mem_cgroup_reclaim_cookie *reclaim) | 863 | struct mem_cgroup_reclaim_cookie *reclaim) |
1033 | { | 864 | { |
1034 | struct reclaim_iter *uninitialized_var(iter); | 865 | struct mem_cgroup_reclaim_iter *uninitialized_var(iter); |
1035 | struct cgroup_subsys_state *css = NULL; | 866 | struct cgroup_subsys_state *css = NULL; |
1036 | struct mem_cgroup *memcg = NULL; | 867 | struct mem_cgroup *memcg = NULL; |
1037 | struct mem_cgroup *pos = NULL; | 868 | struct mem_cgroup *pos = NULL; |
@@ -1173,30 +1004,6 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, | |||
1173 | iter != NULL; \ | 1004 | iter != NULL; \ |
1174 | iter = mem_cgroup_iter(NULL, iter, NULL)) | 1005 | iter = mem_cgroup_iter(NULL, iter, NULL)) |
1175 | 1006 | ||
1176 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx) | ||
1177 | { | ||
1178 | struct mem_cgroup *memcg; | ||
1179 | |||
1180 | rcu_read_lock(); | ||
1181 | memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); | ||
1182 | if (unlikely(!memcg)) | ||
1183 | goto out; | ||
1184 | |||
1185 | switch (idx) { | ||
1186 | case PGFAULT: | ||
1187 | this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); | ||
1188 | break; | ||
1189 | case PGMAJFAULT: | ||
1190 | this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); | ||
1191 | break; | ||
1192 | default: | ||
1193 | BUG(); | ||
1194 | } | ||
1195 | out: | ||
1196 | rcu_read_unlock(); | ||
1197 | } | ||
1198 | EXPORT_SYMBOL(__mem_cgroup_count_vm_event); | ||
1199 | |||
1200 | /** | 1007 | /** |
1201 | * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg | 1008 | * mem_cgroup_zone_lruvec - get the lru list vector for a zone and memcg |
1202 | * @zone: zone of the wanted lruvec | 1009 | * @zone: zone of the wanted lruvec |
@@ -1295,15 +1102,6 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, | |||
1295 | VM_BUG_ON((long)(*lru_size) < 0); | 1102 | VM_BUG_ON((long)(*lru_size) < 0); |
1296 | } | 1103 | } |
1297 | 1104 | ||
1298 | bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root) | ||
1299 | { | ||
1300 | if (root == memcg) | ||
1301 | return true; | ||
1302 | if (!root->use_hierarchy) | ||
1303 | return false; | ||
1304 | return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup); | ||
1305 | } | ||
1306 | |||
1307 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) | 1105 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) |
1308 | { | 1106 | { |
1309 | struct mem_cgroup *task_memcg; | 1107 | struct mem_cgroup *task_memcg; |
@@ -1330,39 +1128,6 @@ bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) | |||
1330 | return ret; | 1128 | return ret; |
1331 | } | 1129 | } |
1332 | 1130 | ||
1333 | int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) | ||
1334 | { | ||
1335 | unsigned long inactive_ratio; | ||
1336 | unsigned long inactive; | ||
1337 | unsigned long active; | ||
1338 | unsigned long gb; | ||
1339 | |||
1340 | inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_ANON); | ||
1341 | active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_ANON); | ||
1342 | |||
1343 | gb = (inactive + active) >> (30 - PAGE_SHIFT); | ||
1344 | if (gb) | ||
1345 | inactive_ratio = int_sqrt(10 * gb); | ||
1346 | else | ||
1347 | inactive_ratio = 1; | ||
1348 | |||
1349 | return inactive * inactive_ratio < active; | ||
1350 | } | ||
1351 | |||
1352 | bool mem_cgroup_lruvec_online(struct lruvec *lruvec) | ||
1353 | { | ||
1354 | struct mem_cgroup_per_zone *mz; | ||
1355 | struct mem_cgroup *memcg; | ||
1356 | |||
1357 | if (mem_cgroup_disabled()) | ||
1358 | return true; | ||
1359 | |||
1360 | mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); | ||
1361 | memcg = mz->memcg; | ||
1362 | |||
1363 | return !!(memcg->css.flags & CSS_ONLINE); | ||
1364 | } | ||
1365 | |||
1366 | #define mem_cgroup_from_counter(counter, member) \ | 1131 | #define mem_cgroup_from_counter(counter, member) \ |
1367 | container_of(counter, struct mem_cgroup, member) | 1132 | container_of(counter, struct mem_cgroup, member) |
1368 | 1133 | ||
@@ -1394,15 +1159,6 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg) | |||
1394 | return margin; | 1159 | return margin; |
1395 | } | 1160 | } |
1396 | 1161 | ||
1397 | int mem_cgroup_swappiness(struct mem_cgroup *memcg) | ||
1398 | { | ||
1399 | /* root ? */ | ||
1400 | if (mem_cgroup_disabled() || !memcg->css.parent) | ||
1401 | return vm_swappiness; | ||
1402 | |||
1403 | return memcg->swappiness; | ||
1404 | } | ||
1405 | |||
1406 | /* | 1162 | /* |
1407 | * A routine for checking "mem" is under move_account() or not. | 1163 | * A routine for checking "mem" is under move_account() or not. |
1408 | * | 1164 | * |
@@ -1545,6 +1301,12 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) | |||
1545 | static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | 1301 | static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, |
1546 | int order) | 1302 | int order) |
1547 | { | 1303 | { |
1304 | struct oom_control oc = { | ||
1305 | .zonelist = NULL, | ||
1306 | .nodemask = NULL, | ||
1307 | .gfp_mask = gfp_mask, | ||
1308 | .order = order, | ||
1309 | }; | ||
1548 | struct mem_cgroup *iter; | 1310 | struct mem_cgroup *iter; |
1549 | unsigned long chosen_points = 0; | 1311 | unsigned long chosen_points = 0; |
1550 | unsigned long totalpages; | 1312 | unsigned long totalpages; |
@@ -1563,7 +1325,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1563 | goto unlock; | 1325 | goto unlock; |
1564 | } | 1326 | } |
1565 | 1327 | ||
1566 | check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL, memcg); | 1328 | check_panic_on_oom(&oc, CONSTRAINT_MEMCG, memcg); |
1567 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; | 1329 | totalpages = mem_cgroup_get_limit(memcg) ? : 1; |
1568 | for_each_mem_cgroup_tree(iter, memcg) { | 1330 | for_each_mem_cgroup_tree(iter, memcg) { |
1569 | struct css_task_iter it; | 1331 | struct css_task_iter it; |
@@ -1571,8 +1333,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1571 | 1333 | ||
1572 | css_task_iter_start(&iter->css, &it); | 1334 | css_task_iter_start(&iter->css, &it); |
1573 | while ((task = css_task_iter_next(&it))) { | 1335 | while ((task = css_task_iter_next(&it))) { |
1574 | switch (oom_scan_process_thread(task, totalpages, NULL, | 1336 | switch (oom_scan_process_thread(&oc, task, totalpages)) { |
1575 | false)) { | ||
1576 | case OOM_SCAN_SELECT: | 1337 | case OOM_SCAN_SELECT: |
1577 | if (chosen) | 1338 | if (chosen) |
1578 | put_task_struct(chosen); | 1339 | put_task_struct(chosen); |
@@ -1610,8 +1371,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
1610 | 1371 | ||
1611 | if (chosen) { | 1372 | if (chosen) { |
1612 | points = chosen_points * 1000 / totalpages; | 1373 | points = chosen_points * 1000 / totalpages; |
1613 | oom_kill_process(chosen, gfp_mask, order, points, totalpages, | 1374 | oom_kill_process(&oc, chosen, points, totalpages, memcg, |
1614 | memcg, NULL, "Memory cgroup out of memory"); | 1375 | "Memory cgroup out of memory"); |
1615 | } | 1376 | } |
1616 | unlock: | 1377 | unlock: |
1617 | mutex_unlock(&oom_lock); | 1378 | mutex_unlock(&oom_lock); |
@@ -2062,23 +1823,6 @@ void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) | |||
2062 | } | 1823 | } |
2063 | EXPORT_SYMBOL(mem_cgroup_end_page_stat); | 1824 | EXPORT_SYMBOL(mem_cgroup_end_page_stat); |
2064 | 1825 | ||
2065 | /** | ||
2066 | * mem_cgroup_update_page_stat - update page state statistics | ||
2067 | * @memcg: memcg to account against | ||
2068 | * @idx: page state item to account | ||
2069 | * @val: number of pages (positive or negative) | ||
2070 | * | ||
2071 | * See mem_cgroup_begin_page_stat() for locking requirements. | ||
2072 | */ | ||
2073 | void mem_cgroup_update_page_stat(struct mem_cgroup *memcg, | ||
2074 | enum mem_cgroup_stat_index idx, int val) | ||
2075 | { | ||
2076 | VM_BUG_ON(!rcu_read_lock_held()); | ||
2077 | |||
2078 | if (memcg) | ||
2079 | this_cpu_add(memcg->stat->count[idx], val); | ||
2080 | } | ||
2081 | |||
2082 | /* | 1826 | /* |
2083 | * size of first charge trial. "32" comes from vmscan.c's magic value. | 1827 | * size of first charge trial. "32" comes from vmscan.c's magic value. |
2084 | * TODO: maybe necessary to use big numbers in big irons. | 1828 | * TODO: maybe necessary to use big numbers in big irons. |
@@ -2355,40 +2099,6 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
2355 | css_put_many(&memcg->css, nr_pages); | 2099 | css_put_many(&memcg->css, nr_pages); |
2356 | } | 2100 | } |
2357 | 2101 | ||
2358 | /* | ||
2359 | * try_get_mem_cgroup_from_page - look up page's memcg association | ||
2360 | * @page: the page | ||
2361 | * | ||
2362 | * Look up, get a css reference, and return the memcg that owns @page. | ||
2363 | * | ||
2364 | * The page must be locked to prevent racing with swap-in and page | ||
2365 | * cache charges. If coming from an unlocked page table, the caller | ||
2366 | * must ensure the page is on the LRU or this can race with charging. | ||
2367 | */ | ||
2368 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | ||
2369 | { | ||
2370 | struct mem_cgroup *memcg; | ||
2371 | unsigned short id; | ||
2372 | swp_entry_t ent; | ||
2373 | |||
2374 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
2375 | |||
2376 | memcg = page->mem_cgroup; | ||
2377 | if (memcg) { | ||
2378 | if (!css_tryget_online(&memcg->css)) | ||
2379 | memcg = NULL; | ||
2380 | } else if (PageSwapCache(page)) { | ||
2381 | ent.val = page_private(page); | ||
2382 | id = lookup_swap_cgroup_id(ent); | ||
2383 | rcu_read_lock(); | ||
2384 | memcg = mem_cgroup_from_id(id); | ||
2385 | if (memcg && !css_tryget_online(&memcg->css)) | ||
2386 | memcg = NULL; | ||
2387 | rcu_read_unlock(); | ||
2388 | } | ||
2389 | return memcg; | ||
2390 | } | ||
2391 | |||
2392 | static void lock_page_lru(struct page *page, int *isolated) | 2102 | static void lock_page_lru(struct page *page, int *isolated) |
2393 | { | 2103 | { |
2394 | struct zone *zone = page_zone(page); | 2104 | struct zone *zone = page_zone(page); |
@@ -2504,16 +2214,6 @@ void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages) | |||
2504 | css_put_many(&memcg->css, nr_pages); | 2214 | css_put_many(&memcg->css, nr_pages); |
2505 | } | 2215 | } |
2506 | 2216 | ||
2507 | /* | ||
2508 | * helper for acessing a memcg's index. It will be used as an index in the | ||
2509 | * child cache array in kmem_cache, and also to derive its name. This function | ||
2510 | * will return -1 when this is not a kmem-limited memcg. | ||
2511 | */ | ||
2512 | int memcg_cache_id(struct mem_cgroup *memcg) | ||
2513 | { | ||
2514 | return memcg ? memcg->kmemcg_id : -1; | ||
2515 | } | ||
2516 | |||
2517 | static int memcg_alloc_cache_id(void) | 2217 | static int memcg_alloc_cache_id(void) |
2518 | { | 2218 | { |
2519 | int id, size; | 2219 | int id, size; |
@@ -5127,10 +4827,12 @@ static void mem_cgroup_clear_mc(void) | |||
5127 | static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, | 4827 | static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, |
5128 | struct cgroup_taskset *tset) | 4828 | struct cgroup_taskset *tset) |
5129 | { | 4829 | { |
5130 | struct task_struct *p = cgroup_taskset_first(tset); | ||
5131 | int ret = 0; | ||
5132 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 4830 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
4831 | struct mem_cgroup *from; | ||
4832 | struct task_struct *p; | ||
4833 | struct mm_struct *mm; | ||
5133 | unsigned long move_flags; | 4834 | unsigned long move_flags; |
4835 | int ret = 0; | ||
5134 | 4836 | ||
5135 | /* | 4837 | /* |
5136 | * We are now commited to this value whatever it is. Changes in this | 4838 | * We are now commited to this value whatever it is. Changes in this |
@@ -5138,36 +4840,37 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css, | |||
5138 | * So we need to save it, and keep it going. | 4840 | * So we need to save it, and keep it going. |
5139 | */ | 4841 | */ |
5140 | move_flags = READ_ONCE(memcg->move_charge_at_immigrate); | 4842 | move_flags = READ_ONCE(memcg->move_charge_at_immigrate); |
5141 | if (move_flags) { | 4843 | if (!move_flags) |
5142 | struct mm_struct *mm; | 4844 | return 0; |
5143 | struct mem_cgroup *from = mem_cgroup_from_task(p); | ||
5144 | 4845 | ||
5145 | VM_BUG_ON(from == memcg); | 4846 | p = cgroup_taskset_first(tset); |
4847 | from = mem_cgroup_from_task(p); | ||
5146 | 4848 | ||
5147 | mm = get_task_mm(p); | 4849 | VM_BUG_ON(from == memcg); |
5148 | if (!mm) | 4850 | |
5149 | return 0; | 4851 | mm = get_task_mm(p); |
5150 | /* We move charges only when we move a owner of the mm */ | 4852 | if (!mm) |
5151 | if (mm->owner == p) { | 4853 | return 0; |
5152 | VM_BUG_ON(mc.from); | 4854 | /* We move charges only when we move a owner of the mm */ |
5153 | VM_BUG_ON(mc.to); | 4855 | if (mm->owner == p) { |
5154 | VM_BUG_ON(mc.precharge); | 4856 | VM_BUG_ON(mc.from); |
5155 | VM_BUG_ON(mc.moved_charge); | 4857 | VM_BUG_ON(mc.to); |
5156 | VM_BUG_ON(mc.moved_swap); | 4858 | VM_BUG_ON(mc.precharge); |
5157 | 4859 | VM_BUG_ON(mc.moved_charge); | |
5158 | spin_lock(&mc.lock); | 4860 | VM_BUG_ON(mc.moved_swap); |
5159 | mc.from = from; | 4861 | |
5160 | mc.to = memcg; | 4862 | spin_lock(&mc.lock); |
5161 | mc.flags = move_flags; | 4863 | mc.from = from; |
5162 | spin_unlock(&mc.lock); | 4864 | mc.to = memcg; |
5163 | /* We set mc.moving_task later */ | 4865 | mc.flags = move_flags; |
5164 | 4866 | spin_unlock(&mc.lock); | |
5165 | ret = mem_cgroup_precharge_mc(mm); | 4867 | /* We set mc.moving_task later */ |
5166 | if (ret) | 4868 | |
5167 | mem_cgroup_clear_mc(); | 4869 | ret = mem_cgroup_precharge_mc(mm); |
5168 | } | 4870 | if (ret) |
5169 | mmput(mm); | 4871 | mem_cgroup_clear_mc(); |
5170 | } | 4872 | } |
4873 | mmput(mm); | ||
5171 | return ret; | 4874 | return ret; |
5172 | } | 4875 | } |
5173 | 4876 | ||
@@ -5521,19 +5224,6 @@ struct cgroup_subsys memory_cgrp_subsys = { | |||
5521 | }; | 5224 | }; |
5522 | 5225 | ||
5523 | /** | 5226 | /** |
5524 | * mem_cgroup_events - count memory events against a cgroup | ||
5525 | * @memcg: the memory cgroup | ||
5526 | * @idx: the event index | ||
5527 | * @nr: the number of events to account for | ||
5528 | */ | ||
5529 | void mem_cgroup_events(struct mem_cgroup *memcg, | ||
5530 | enum mem_cgroup_events_index idx, | ||
5531 | unsigned int nr) | ||
5532 | { | ||
5533 | this_cpu_add(memcg->stat->events[idx], nr); | ||
5534 | } | ||
5535 | |||
5536 | /** | ||
5537 | * mem_cgroup_low - check if memory consumption is below the normal range | 5227 | * mem_cgroup_low - check if memory consumption is below the normal range |
5538 | * @root: the highest ancestor to consider | 5228 | * @root: the highest ancestor to consider |
5539 | * @memcg: the memory cgroup to check | 5229 | * @memcg: the memory cgroup to check |
@@ -5605,8 +5295,20 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | |||
5605 | * the page lock, which serializes swap cache removal, which | 5295 | * the page lock, which serializes swap cache removal, which |
5606 | * in turn serializes uncharging. | 5296 | * in turn serializes uncharging. |
5607 | */ | 5297 | */ |
5298 | VM_BUG_ON_PAGE(!PageLocked(page), page); | ||
5608 | if (page->mem_cgroup) | 5299 | if (page->mem_cgroup) |
5609 | goto out; | 5300 | goto out; |
5301 | |||
5302 | if (do_swap_account) { | ||
5303 | swp_entry_t ent = { .val = page_private(page), }; | ||
5304 | unsigned short id = lookup_swap_cgroup_id(ent); | ||
5305 | |||
5306 | rcu_read_lock(); | ||
5307 | memcg = mem_cgroup_from_id(id); | ||
5308 | if (memcg && !css_tryget_online(&memcg->css)) | ||
5309 | memcg = NULL; | ||
5310 | rcu_read_unlock(); | ||
5311 | } | ||
5610 | } | 5312 | } |
5611 | 5313 | ||
5612 | if (PageTransHuge(page)) { | 5314 | if (PageTransHuge(page)) { |
@@ -5614,8 +5316,6 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, | |||
5614 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 5316 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
5615 | } | 5317 | } |
5616 | 5318 | ||
5617 | if (do_swap_account && PageSwapCache(page)) | ||
5618 | memcg = try_get_mem_cgroup_from_page(page); | ||
5619 | if (!memcg) | 5319 | if (!memcg) |
5620 | memcg = get_mem_cgroup_from_mm(mm); | 5320 | memcg = get_mem_cgroup_from_mm(mm); |
5621 | 5321 | ||