diff options
author | Jiri Kosina <jkosina@suse.cz> | 2010-12-10 09:19:18 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2010-12-10 09:19:18 -0500 |
commit | 2ade0c1d9d93b7642212657ef76f4a1e30233711 (patch) | |
tree | 63bc720c0ffe5f4760cac4ed617b9870b050175e /mm/memcontrol.c | |
parent | 504499f22c08a03e2e19dc88d31aa0ecd2ac815e (diff) | |
parent | 6313e3c21743cc88bb5bd8aa72948ee1e83937b6 (diff) |
Merge branch 'master' into upstream
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 488 |
1 files changed, 351 insertions, 137 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9be3cf8a5da4..7a22b4129211 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; | |||
61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
63 | int do_swap_account __read_mostly; | 63 | int do_swap_account __read_mostly; |
64 | static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | 64 | |
65 | /* for remember boot option*/ | ||
66 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED | ||
67 | static int really_do_swap_account __initdata = 1; | ||
68 | #else | ||
69 | static int really_do_swap_account __initdata = 0; | ||
70 | #endif | ||
71 | |||
65 | #else | 72 | #else |
66 | #define do_swap_account (0) | 73 | #define do_swap_account (0) |
67 | #endif | 74 | #endif |
@@ -89,7 +96,10 @@ enum mem_cgroup_stat_index { | |||
89 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | 96 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ |
90 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | 97 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ |
91 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ | 98 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ |
92 | MEM_CGROUP_EVENTS, /* incremented at every pagein/pageout */ | 99 | MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ |
100 | /* incremented at every pagein/pageout */ | ||
101 | MEM_CGROUP_EVENTS = MEM_CGROUP_STAT_DATA, | ||
102 | MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ | ||
93 | 103 | ||
94 | MEM_CGROUP_STAT_NSTATS, | 104 | MEM_CGROUP_STAT_NSTATS, |
95 | }; | 105 | }; |
@@ -254,6 +264,12 @@ struct mem_cgroup { | |||
254 | * percpu counter. | 264 | * percpu counter. |
255 | */ | 265 | */ |
256 | struct mem_cgroup_stat_cpu *stat; | 266 | struct mem_cgroup_stat_cpu *stat; |
267 | /* | ||
268 | * used when a cpu is offlined or other synchronizations | ||
269 | * See mem_cgroup_read_stat(). | ||
270 | */ | ||
271 | struct mem_cgroup_stat_cpu nocpu_base; | ||
272 | spinlock_t pcp_counter_lock; | ||
257 | }; | 273 | }; |
258 | 274 | ||
259 | /* Stuffs for move charges at task migration. */ | 275 | /* Stuffs for move charges at task migration. */ |
@@ -269,13 +285,14 @@ enum move_type { | |||
269 | 285 | ||
270 | /* "mc" and its members are protected by cgroup_mutex */ | 286 | /* "mc" and its members are protected by cgroup_mutex */ |
271 | static struct move_charge_struct { | 287 | static struct move_charge_struct { |
272 | spinlock_t lock; /* for from, to, moving_task */ | 288 | spinlock_t lock; /* for from, to */ |
273 | struct mem_cgroup *from; | 289 | struct mem_cgroup *from; |
274 | struct mem_cgroup *to; | 290 | struct mem_cgroup *to; |
275 | unsigned long precharge; | 291 | unsigned long precharge; |
276 | unsigned long moved_charge; | 292 | unsigned long moved_charge; |
277 | unsigned long moved_swap; | 293 | unsigned long moved_swap; |
278 | struct task_struct *moving_task; /* a task moving charges */ | 294 | struct task_struct *moving_task; /* a task moving charges */ |
295 | struct mm_struct *mm; | ||
279 | wait_queue_head_t waitq; /* a waitq for other context */ | 296 | wait_queue_head_t waitq; /* a waitq for other context */ |
280 | } mc = { | 297 | } mc = { |
281 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), | 298 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), |
@@ -530,14 +547,40 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
530 | return mz; | 547 | return mz; |
531 | } | 548 | } |
532 | 549 | ||
550 | /* | ||
551 | * Implementation Note: reading percpu statistics for memcg. | ||
552 | * | ||
553 | * Both of vmstat[] and percpu_counter has threshold and do periodic | ||
554 | * synchronization to implement "quick" read. There are trade-off between | ||
555 | * reading cost and precision of value. Then, we may have a chance to implement | ||
556 | * a periodic synchronizion of counter in memcg's counter. | ||
557 | * | ||
558 | * But this _read() function is used for user interface now. The user accounts | ||
559 | * memory usage by memory cgroup and he _always_ requires exact value because | ||
560 | * he accounts memory. Even if we provide quick-and-fuzzy read, we always | ||
561 | * have to visit all online cpus and make sum. So, for now, unnecessary | ||
562 | * synchronization is not implemented. (just implemented for cpu hotplug) | ||
563 | * | ||
564 | * If there are kernel internal actions which can make use of some not-exact | ||
565 | * value, and reading all cpu value can be performance bottleneck in some | ||
566 | * common workload, threashold and synchonization as vmstat[] should be | ||
567 | * implemented. | ||
568 | */ | ||
533 | static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, | 569 | static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, |
534 | enum mem_cgroup_stat_index idx) | 570 | enum mem_cgroup_stat_index idx) |
535 | { | 571 | { |
536 | int cpu; | 572 | int cpu; |
537 | s64 val = 0; | 573 | s64 val = 0; |
538 | 574 | ||
539 | for_each_possible_cpu(cpu) | 575 | get_online_cpus(); |
576 | for_each_online_cpu(cpu) | ||
540 | val += per_cpu(mem->stat->count[idx], cpu); | 577 | val += per_cpu(mem->stat->count[idx], cpu); |
578 | #ifdef CONFIG_HOTPLUG_CPU | ||
579 | spin_lock(&mem->pcp_counter_lock); | ||
580 | val += mem->nocpu_base.count[idx]; | ||
581 | spin_unlock(&mem->pcp_counter_lock); | ||
582 | #endif | ||
583 | put_online_cpus(); | ||
541 | return val; | 584 | return val; |
542 | } | 585 | } |
543 | 586 | ||
@@ -659,40 +702,83 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
659 | return mem; | 702 | return mem; |
660 | } | 703 | } |
661 | 704 | ||
662 | /* | 705 | /* The caller has to guarantee "mem" exists before calling this */ |
663 | * Call callback function against all cgroup under hierarchy tree. | 706 | static struct mem_cgroup *mem_cgroup_start_loop(struct mem_cgroup *mem) |
664 | */ | ||
665 | static int mem_cgroup_walk_tree(struct mem_cgroup *root, void *data, | ||
666 | int (*func)(struct mem_cgroup *, void *)) | ||
667 | { | 707 | { |
668 | int found, ret, nextid; | ||
669 | struct cgroup_subsys_state *css; | 708 | struct cgroup_subsys_state *css; |
670 | struct mem_cgroup *mem; | 709 | int found; |
671 | |||
672 | if (!root->use_hierarchy) | ||
673 | return (*func)(root, data); | ||
674 | 710 | ||
675 | nextid = 1; | 711 | if (!mem) /* ROOT cgroup has the smallest ID */ |
676 | do { | 712 | return root_mem_cgroup; /*css_put/get against root is ignored*/ |
677 | ret = 0; | 713 | if (!mem->use_hierarchy) { |
714 | if (css_tryget(&mem->css)) | ||
715 | return mem; | ||
716 | return NULL; | ||
717 | } | ||
718 | rcu_read_lock(); | ||
719 | /* | ||
720 | * searching a memory cgroup which has the smallest ID under given | ||
721 | * ROOT cgroup. (ID >= 1) | ||
722 | */ | ||
723 | css = css_get_next(&mem_cgroup_subsys, 1, &mem->css, &found); | ||
724 | if (css && css_tryget(css)) | ||
725 | mem = container_of(css, struct mem_cgroup, css); | ||
726 | else | ||
678 | mem = NULL; | 727 | mem = NULL; |
728 | rcu_read_unlock(); | ||
729 | return mem; | ||
730 | } | ||
731 | |||
732 | static struct mem_cgroup *mem_cgroup_get_next(struct mem_cgroup *iter, | ||
733 | struct mem_cgroup *root, | ||
734 | bool cond) | ||
735 | { | ||
736 | int nextid = css_id(&iter->css) + 1; | ||
737 | int found; | ||
738 | int hierarchy_used; | ||
739 | struct cgroup_subsys_state *css; | ||
740 | |||
741 | hierarchy_used = iter->use_hierarchy; | ||
742 | |||
743 | css_put(&iter->css); | ||
744 | /* If no ROOT, walk all, ignore hierarchy */ | ||
745 | if (!cond || (root && !hierarchy_used)) | ||
746 | return NULL; | ||
747 | |||
748 | if (!root) | ||
749 | root = root_mem_cgroup; | ||
679 | 750 | ||
751 | do { | ||
752 | iter = NULL; | ||
680 | rcu_read_lock(); | 753 | rcu_read_lock(); |
681 | css = css_get_next(&mem_cgroup_subsys, nextid, &root->css, | 754 | |
682 | &found); | 755 | css = css_get_next(&mem_cgroup_subsys, nextid, |
756 | &root->css, &found); | ||
683 | if (css && css_tryget(css)) | 757 | if (css && css_tryget(css)) |
684 | mem = container_of(css, struct mem_cgroup, css); | 758 | iter = container_of(css, struct mem_cgroup, css); |
685 | rcu_read_unlock(); | 759 | rcu_read_unlock(); |
686 | 760 | /* If css is NULL, no more cgroups will be found */ | |
687 | if (mem) { | ||
688 | ret = (*func)(mem, data); | ||
689 | css_put(&mem->css); | ||
690 | } | ||
691 | nextid = found + 1; | 761 | nextid = found + 1; |
692 | } while (!ret && css); | 762 | } while (css && !iter); |
693 | 763 | ||
694 | return ret; | 764 | return iter; |
695 | } | 765 | } |
766 | /* | ||
767 | * for_eacn_mem_cgroup_tree() for visiting all cgroup under tree. Please | ||
768 | * be careful that "break" loop is not allowed. We have reference count. | ||
769 | * Instead of that modify "cond" to be false and "continue" to exit the loop. | ||
770 | */ | ||
771 | #define for_each_mem_cgroup_tree_cond(iter, root, cond) \ | ||
772 | for (iter = mem_cgroup_start_loop(root);\ | ||
773 | iter != NULL;\ | ||
774 | iter = mem_cgroup_get_next(iter, root, cond)) | ||
775 | |||
776 | #define for_each_mem_cgroup_tree(iter, root) \ | ||
777 | for_each_mem_cgroup_tree_cond(iter, root, true) | ||
778 | |||
779 | #define for_each_mem_cgroup_all(iter) \ | ||
780 | for_each_mem_cgroup_tree_cond(iter, NULL, true) | ||
781 | |||
696 | 782 | ||
697 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) | 783 | static inline bool mem_cgroup_is_root(struct mem_cgroup *mem) |
698 | { | 784 | { |
@@ -1051,7 +1137,52 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg) | |||
1051 | return swappiness; | 1137 | return swappiness; |
1052 | } | 1138 | } |
1053 | 1139 | ||
1054 | /* A routine for testing mem is not under move_account */ | 1140 | static void mem_cgroup_start_move(struct mem_cgroup *mem) |
1141 | { | ||
1142 | int cpu; | ||
1143 | |||
1144 | get_online_cpus(); | ||
1145 | spin_lock(&mem->pcp_counter_lock); | ||
1146 | for_each_online_cpu(cpu) | ||
1147 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) += 1; | ||
1148 | mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] += 1; | ||
1149 | spin_unlock(&mem->pcp_counter_lock); | ||
1150 | put_online_cpus(); | ||
1151 | |||
1152 | synchronize_rcu(); | ||
1153 | } | ||
1154 | |||
1155 | static void mem_cgroup_end_move(struct mem_cgroup *mem) | ||
1156 | { | ||
1157 | int cpu; | ||
1158 | |||
1159 | if (!mem) | ||
1160 | return; | ||
1161 | get_online_cpus(); | ||
1162 | spin_lock(&mem->pcp_counter_lock); | ||
1163 | for_each_online_cpu(cpu) | ||
1164 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) -= 1; | ||
1165 | mem->nocpu_base.count[MEM_CGROUP_ON_MOVE] -= 1; | ||
1166 | spin_unlock(&mem->pcp_counter_lock); | ||
1167 | put_online_cpus(); | ||
1168 | } | ||
1169 | /* | ||
1170 | * 2 routines for checking "mem" is under move_account() or not. | ||
1171 | * | ||
1172 | * mem_cgroup_stealed() - checking a cgroup is mc.from or not. This is used | ||
1173 | * for avoiding race in accounting. If true, | ||
1174 | * pc->mem_cgroup may be overwritten. | ||
1175 | * | ||
1176 | * mem_cgroup_under_move() - checking a cgroup is mc.from or mc.to or | ||
1177 | * under hierarchy of moving cgroups. This is for | ||
1178 | * waiting at hith-memory prressure caused by "move". | ||
1179 | */ | ||
1180 | |||
1181 | static bool mem_cgroup_stealed(struct mem_cgroup *mem) | ||
1182 | { | ||
1183 | VM_BUG_ON(!rcu_read_lock_held()); | ||
1184 | return this_cpu_read(mem->stat->count[MEM_CGROUP_ON_MOVE]) > 0; | ||
1185 | } | ||
1055 | 1186 | ||
1056 | static bool mem_cgroup_under_move(struct mem_cgroup *mem) | 1187 | static bool mem_cgroup_under_move(struct mem_cgroup *mem) |
1057 | { | 1188 | { |
@@ -1092,13 +1223,6 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *mem) | |||
1092 | return false; | 1223 | return false; |
1093 | } | 1224 | } |
1094 | 1225 | ||
1095 | static int mem_cgroup_count_children_cb(struct mem_cgroup *mem, void *data) | ||
1096 | { | ||
1097 | int *val = data; | ||
1098 | (*val)++; | ||
1099 | return 0; | ||
1100 | } | ||
1101 | |||
1102 | /** | 1226 | /** |
1103 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. | 1227 | * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. |
1104 | * @memcg: The memory cgroup that went over limit | 1228 | * @memcg: The memory cgroup that went over limit |
@@ -1173,7 +1297,10 @@ done: | |||
1173 | static int mem_cgroup_count_children(struct mem_cgroup *mem) | 1297 | static int mem_cgroup_count_children(struct mem_cgroup *mem) |
1174 | { | 1298 | { |
1175 | int num = 0; | 1299 | int num = 0; |
1176 | mem_cgroup_walk_tree(mem, &num, mem_cgroup_count_children_cb); | 1300 | struct mem_cgroup *iter; |
1301 | |||
1302 | for_each_mem_cgroup_tree(iter, mem) | ||
1303 | num++; | ||
1177 | return num; | 1304 | return num; |
1178 | } | 1305 | } |
1179 | 1306 | ||
@@ -1322,49 +1449,39 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1322 | return total; | 1449 | return total; |
1323 | } | 1450 | } |
1324 | 1451 | ||
1325 | static int mem_cgroup_oom_lock_cb(struct mem_cgroup *mem, void *data) | ||
1326 | { | ||
1327 | int *val = (int *)data; | ||
1328 | int x; | ||
1329 | /* | ||
1330 | * Logically, we can stop scanning immediately when we find | ||
1331 | * a memcg is already locked. But condidering unlock ops and | ||
1332 | * creation/removal of memcg, scan-all is simple operation. | ||
1333 | */ | ||
1334 | x = atomic_inc_return(&mem->oom_lock); | ||
1335 | *val = max(x, *val); | ||
1336 | return 0; | ||
1337 | } | ||
1338 | /* | 1452 | /* |
1339 | * Check OOM-Killer is already running under our hierarchy. | 1453 | * Check OOM-Killer is already running under our hierarchy. |
1340 | * If someone is running, return false. | 1454 | * If someone is running, return false. |
1341 | */ | 1455 | */ |
1342 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) | 1456 | static bool mem_cgroup_oom_lock(struct mem_cgroup *mem) |
1343 | { | 1457 | { |
1344 | int lock_count = 0; | 1458 | int x, lock_count = 0; |
1459 | struct mem_cgroup *iter; | ||
1345 | 1460 | ||
1346 | mem_cgroup_walk_tree(mem, &lock_count, mem_cgroup_oom_lock_cb); | 1461 | for_each_mem_cgroup_tree(iter, mem) { |
1462 | x = atomic_inc_return(&iter->oom_lock); | ||
1463 | lock_count = max(x, lock_count); | ||
1464 | } | ||
1347 | 1465 | ||
1348 | if (lock_count == 1) | 1466 | if (lock_count == 1) |
1349 | return true; | 1467 | return true; |
1350 | return false; | 1468 | return false; |
1351 | } | 1469 | } |
1352 | 1470 | ||
1353 | static int mem_cgroup_oom_unlock_cb(struct mem_cgroup *mem, void *data) | 1471 | static int mem_cgroup_oom_unlock(struct mem_cgroup *mem) |
1354 | { | 1472 | { |
1473 | struct mem_cgroup *iter; | ||
1474 | |||
1355 | /* | 1475 | /* |
1356 | * When a new child is created while the hierarchy is under oom, | 1476 | * When a new child is created while the hierarchy is under oom, |
1357 | * mem_cgroup_oom_lock() may not be called. We have to use | 1477 | * mem_cgroup_oom_lock() may not be called. We have to use |
1358 | * atomic_add_unless() here. | 1478 | * atomic_add_unless() here. |
1359 | */ | 1479 | */ |
1360 | atomic_add_unless(&mem->oom_lock, -1, 0); | 1480 | for_each_mem_cgroup_tree(iter, mem) |
1481 | atomic_add_unless(&iter->oom_lock, -1, 0); | ||
1361 | return 0; | 1482 | return 0; |
1362 | } | 1483 | } |
1363 | 1484 | ||
1364 | static void mem_cgroup_oom_unlock(struct mem_cgroup *mem) | ||
1365 | { | ||
1366 | mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_unlock_cb); | ||
1367 | } | ||
1368 | 1485 | ||
1369 | static DEFINE_MUTEX(memcg_oom_mutex); | 1486 | static DEFINE_MUTEX(memcg_oom_mutex); |
1370 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); | 1487 | static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq); |
@@ -1462,34 +1579,73 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *mem, gfp_t mask) | |||
1462 | /* | 1579 | /* |
1463 | * Currently used to update mapped file statistics, but the routine can be | 1580 | * Currently used to update mapped file statistics, but the routine can be |
1464 | * generalized to update other statistics as well. | 1581 | * generalized to update other statistics as well. |
1582 | * | ||
1583 | * Notes: Race condition | ||
1584 | * | ||
1585 | * We usually use page_cgroup_lock() for accessing page_cgroup member but | ||
1586 | * it tends to be costly. But considering some conditions, we doesn't need | ||
1587 | * to do so _always_. | ||
1588 | * | ||
1589 | * Considering "charge", lock_page_cgroup() is not required because all | ||
1590 | * file-stat operations happen after a page is attached to radix-tree. There | ||
1591 | * are no race with "charge". | ||
1592 | * | ||
1593 | * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup | ||
1594 | * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even | ||
1595 | * if there are race with "uncharge". Statistics itself is properly handled | ||
1596 | * by flags. | ||
1597 | * | ||
1598 | * Considering "move", this is an only case we see a race. To make the race | ||
1599 | * small, we check MEM_CGROUP_ON_MOVE percpu value and detect there are | ||
1600 | * possibility of race condition. If there is, we take a lock. | ||
1465 | */ | 1601 | */ |
1466 | void mem_cgroup_update_file_mapped(struct page *page, int val) | 1602 | |
1603 | static void mem_cgroup_update_file_stat(struct page *page, int idx, int val) | ||
1467 | { | 1604 | { |
1468 | struct mem_cgroup *mem; | 1605 | struct mem_cgroup *mem; |
1469 | struct page_cgroup *pc; | 1606 | struct page_cgroup *pc = lookup_page_cgroup(page); |
1607 | bool need_unlock = false; | ||
1470 | 1608 | ||
1471 | pc = lookup_page_cgroup(page); | ||
1472 | if (unlikely(!pc)) | 1609 | if (unlikely(!pc)) |
1473 | return; | 1610 | return; |
1474 | 1611 | ||
1475 | lock_page_cgroup(pc); | 1612 | rcu_read_lock(); |
1476 | mem = pc->mem_cgroup; | 1613 | mem = pc->mem_cgroup; |
1477 | if (!mem || !PageCgroupUsed(pc)) | 1614 | if (unlikely(!mem || !PageCgroupUsed(pc))) |
1478 | goto done; | 1615 | goto out; |
1616 | /* pc->mem_cgroup is unstable ? */ | ||
1617 | if (unlikely(mem_cgroup_stealed(mem))) { | ||
1618 | /* take a lock against to access pc->mem_cgroup */ | ||
1619 | lock_page_cgroup(pc); | ||
1620 | need_unlock = true; | ||
1621 | mem = pc->mem_cgroup; | ||
1622 | if (!mem || !PageCgroupUsed(pc)) | ||
1623 | goto out; | ||
1624 | } | ||
1479 | 1625 | ||
1480 | /* | 1626 | this_cpu_add(mem->stat->count[idx], val); |
1481 | * Preemption is already disabled. We can use __this_cpu_xxx | 1627 | |
1482 | */ | 1628 | switch (idx) { |
1483 | if (val > 0) { | 1629 | case MEM_CGROUP_STAT_FILE_MAPPED: |
1484 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 1630 | if (val > 0) |
1485 | SetPageCgroupFileMapped(pc); | 1631 | SetPageCgroupFileMapped(pc); |
1486 | } else { | 1632 | else if (!page_mapped(page)) |
1487 | __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); | 1633 | ClearPageCgroupFileMapped(pc); |
1488 | ClearPageCgroupFileMapped(pc); | 1634 | break; |
1635 | default: | ||
1636 | BUG(); | ||
1489 | } | 1637 | } |
1490 | 1638 | ||
1491 | done: | 1639 | out: |
1492 | unlock_page_cgroup(pc); | 1640 | if (unlikely(need_unlock)) |
1641 | unlock_page_cgroup(pc); | ||
1642 | rcu_read_unlock(); | ||
1643 | return; | ||
1644 | } | ||
1645 | |||
1646 | void mem_cgroup_update_file_mapped(struct page *page, int val) | ||
1647 | { | ||
1648 | mem_cgroup_update_file_stat(page, MEM_CGROUP_STAT_FILE_MAPPED, val); | ||
1493 | } | 1649 | } |
1494 | 1650 | ||
1495 | /* | 1651 | /* |
@@ -1605,15 +1761,55 @@ static void drain_all_stock_sync(void) | |||
1605 | atomic_dec(&memcg_drain_count); | 1761 | atomic_dec(&memcg_drain_count); |
1606 | } | 1762 | } |
1607 | 1763 | ||
1608 | static int __cpuinit memcg_stock_cpu_callback(struct notifier_block *nb, | 1764 | /* |
1765 | * This function drains percpu counter value from DEAD cpu and | ||
1766 | * move it to local cpu. Note that this function can be preempted. | ||
1767 | */ | ||
1768 | static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) | ||
1769 | { | ||
1770 | int i; | ||
1771 | |||
1772 | spin_lock(&mem->pcp_counter_lock); | ||
1773 | for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { | ||
1774 | s64 x = per_cpu(mem->stat->count[i], cpu); | ||
1775 | |||
1776 | per_cpu(mem->stat->count[i], cpu) = 0; | ||
1777 | mem->nocpu_base.count[i] += x; | ||
1778 | } | ||
1779 | /* need to clear ON_MOVE value, works as a kind of lock. */ | ||
1780 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; | ||
1781 | spin_unlock(&mem->pcp_counter_lock); | ||
1782 | } | ||
1783 | |||
1784 | static void synchronize_mem_cgroup_on_move(struct mem_cgroup *mem, int cpu) | ||
1785 | { | ||
1786 | int idx = MEM_CGROUP_ON_MOVE; | ||
1787 | |||
1788 | spin_lock(&mem->pcp_counter_lock); | ||
1789 | per_cpu(mem->stat->count[idx], cpu) = mem->nocpu_base.count[idx]; | ||
1790 | spin_unlock(&mem->pcp_counter_lock); | ||
1791 | } | ||
1792 | |||
1793 | static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, | ||
1609 | unsigned long action, | 1794 | unsigned long action, |
1610 | void *hcpu) | 1795 | void *hcpu) |
1611 | { | 1796 | { |
1612 | int cpu = (unsigned long)hcpu; | 1797 | int cpu = (unsigned long)hcpu; |
1613 | struct memcg_stock_pcp *stock; | 1798 | struct memcg_stock_pcp *stock; |
1799 | struct mem_cgroup *iter; | ||
1614 | 1800 | ||
1615 | if (action != CPU_DEAD) | 1801 | if ((action == CPU_ONLINE)) { |
1802 | for_each_mem_cgroup_all(iter) | ||
1803 | synchronize_mem_cgroup_on_move(iter, cpu); | ||
1616 | return NOTIFY_OK; | 1804 | return NOTIFY_OK; |
1805 | } | ||
1806 | |||
1807 | if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN) | ||
1808 | return NOTIFY_OK; | ||
1809 | |||
1810 | for_each_mem_cgroup_all(iter) | ||
1811 | mem_cgroup_drain_pcp_counter(iter, cpu); | ||
1812 | |||
1617 | stock = &per_cpu(memcg_stock, cpu); | 1813 | stock = &per_cpu(memcg_stock, cpu); |
1618 | drain_stock(stock); | 1814 | drain_stock(stock); |
1619 | return NOTIFY_OK; | 1815 | return NOTIFY_OK; |
@@ -1964,7 +2160,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
1964 | { | 2160 | { |
1965 | VM_BUG_ON(from == to); | 2161 | VM_BUG_ON(from == to); |
1966 | VM_BUG_ON(PageLRU(pc->page)); | 2162 | VM_BUG_ON(PageLRU(pc->page)); |
1967 | VM_BUG_ON(!PageCgroupLocked(pc)); | 2163 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
1968 | VM_BUG_ON(!PageCgroupUsed(pc)); | 2164 | VM_BUG_ON(!PageCgroupUsed(pc)); |
1969 | VM_BUG_ON(pc->mem_cgroup != from); | 2165 | VM_BUG_ON(pc->mem_cgroup != from); |
1970 | 2166 | ||
@@ -3038,6 +3234,7 @@ move_account: | |||
3038 | lru_add_drain_all(); | 3234 | lru_add_drain_all(); |
3039 | drain_all_stock_sync(); | 3235 | drain_all_stock_sync(); |
3040 | ret = 0; | 3236 | ret = 0; |
3237 | mem_cgroup_start_move(mem); | ||
3041 | for_each_node_state(node, N_HIGH_MEMORY) { | 3238 | for_each_node_state(node, N_HIGH_MEMORY) { |
3042 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { | 3239 | for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) { |
3043 | enum lru_list l; | 3240 | enum lru_list l; |
@@ -3051,6 +3248,7 @@ move_account: | |||
3051 | if (ret) | 3248 | if (ret) |
3052 | break; | 3249 | break; |
3053 | } | 3250 | } |
3251 | mem_cgroup_end_move(mem); | ||
3054 | memcg_oom_recover(mem); | 3252 | memcg_oom_recover(mem); |
3055 | /* it seems parent cgroup doesn't have enough mem */ | 3253 | /* it seems parent cgroup doesn't have enough mem */ |
3056 | if (ret == -ENOMEM) | 3254 | if (ret == -ENOMEM) |
@@ -3137,33 +3335,25 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
3137 | return retval; | 3335 | return retval; |
3138 | } | 3336 | } |
3139 | 3337 | ||
3140 | struct mem_cgroup_idx_data { | ||
3141 | s64 val; | ||
3142 | enum mem_cgroup_stat_index idx; | ||
3143 | }; | ||
3144 | 3338 | ||
3145 | static int | 3339 | static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, |
3146 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | 3340 | enum mem_cgroup_stat_index idx) |
3147 | { | 3341 | { |
3148 | struct mem_cgroup_idx_data *d = data; | 3342 | struct mem_cgroup *iter; |
3149 | d->val += mem_cgroup_read_stat(mem, d->idx); | 3343 | s64 val = 0; |
3150 | return 0; | ||
3151 | } | ||
3152 | 3344 | ||
3153 | static void | 3345 | /* each per cpu's value can be minus.Then, use s64 */ |
3154 | mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | 3346 | for_each_mem_cgroup_tree(iter, mem) |
3155 | enum mem_cgroup_stat_index idx, s64 *val) | 3347 | val += mem_cgroup_read_stat(iter, idx); |
3156 | { | 3348 | |
3157 | struct mem_cgroup_idx_data d; | 3349 | if (val < 0) /* race ? */ |
3158 | d.idx = idx; | 3350 | val = 0; |
3159 | d.val = 0; | 3351 | return val; |
3160 | mem_cgroup_walk_tree(mem, &d, mem_cgroup_get_idx_stat); | ||
3161 | *val = d.val; | ||
3162 | } | 3352 | } |
3163 | 3353 | ||
3164 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | 3354 | static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) |
3165 | { | 3355 | { |
3166 | u64 idx_val, val; | 3356 | u64 val; |
3167 | 3357 | ||
3168 | if (!mem_cgroup_is_root(mem)) { | 3358 | if (!mem_cgroup_is_root(mem)) { |
3169 | if (!swap) | 3359 | if (!swap) |
@@ -3172,16 +3362,12 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | |||
3172 | return res_counter_read_u64(&mem->memsw, RES_USAGE); | 3362 | return res_counter_read_u64(&mem->memsw, RES_USAGE); |
3173 | } | 3363 | } |
3174 | 3364 | ||
3175 | mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE, &idx_val); | 3365 | val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); |
3176 | val = idx_val; | 3366 | val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); |
3177 | mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS, &idx_val); | ||
3178 | val += idx_val; | ||
3179 | 3367 | ||
3180 | if (swap) { | 3368 | if (swap) |
3181 | mem_cgroup_get_recursive_idx_stat(mem, | 3369 | val += mem_cgroup_get_recursive_idx_stat(mem, |
3182 | MEM_CGROUP_STAT_SWAPOUT, &idx_val); | 3370 | MEM_CGROUP_STAT_SWAPOUT); |
3183 | val += idx_val; | ||
3184 | } | ||
3185 | 3371 | ||
3186 | return val << PAGE_SHIFT; | 3372 | return val << PAGE_SHIFT; |
3187 | } | 3373 | } |
@@ -3389,9 +3575,9 @@ struct { | |||
3389 | }; | 3575 | }; |
3390 | 3576 | ||
3391 | 3577 | ||
3392 | static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | 3578 | static void |
3579 | mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | ||
3393 | { | 3580 | { |
3394 | struct mcs_total_stat *s = data; | ||
3395 | s64 val; | 3581 | s64 val; |
3396 | 3582 | ||
3397 | /* per cpu stat */ | 3583 | /* per cpu stat */ |
@@ -3421,13 +3607,15 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
3421 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; | 3607 | s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE; |
3422 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); | 3608 | val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE); |
3423 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; | 3609 | s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE; |
3424 | return 0; | ||
3425 | } | 3610 | } |
3426 | 3611 | ||
3427 | static void | 3612 | static void |
3428 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | 3613 | mem_cgroup_get_total_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) |
3429 | { | 3614 | { |
3430 | mem_cgroup_walk_tree(mem, s, mem_cgroup_get_local_stat); | 3615 | struct mem_cgroup *iter; |
3616 | |||
3617 | for_each_mem_cgroup_tree(iter, mem) | ||
3618 | mem_cgroup_get_local_stat(iter, s); | ||
3431 | } | 3619 | } |
3432 | 3620 | ||
3433 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, | 3621 | static int mem_control_stat_show(struct cgroup *cont, struct cftype *cft, |
@@ -3604,7 +3792,7 @@ static int compare_thresholds(const void *a, const void *b) | |||
3604 | return _a->threshold - _b->threshold; | 3792 | return _a->threshold - _b->threshold; |
3605 | } | 3793 | } |
3606 | 3794 | ||
3607 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | 3795 | static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem) |
3608 | { | 3796 | { |
3609 | struct mem_cgroup_eventfd_list *ev; | 3797 | struct mem_cgroup_eventfd_list *ev; |
3610 | 3798 | ||
@@ -3615,7 +3803,10 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *mem, void *data) | |||
3615 | 3803 | ||
3616 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) | 3804 | static void mem_cgroup_oom_notify(struct mem_cgroup *mem) |
3617 | { | 3805 | { |
3618 | mem_cgroup_walk_tree(mem, NULL, mem_cgroup_oom_notify_cb); | 3806 | struct mem_cgroup *iter; |
3807 | |||
3808 | for_each_mem_cgroup_tree(iter, mem) | ||
3809 | mem_cgroup_oom_notify_cb(iter); | ||
3619 | } | 3810 | } |
3620 | 3811 | ||
3621 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, | 3812 | static int mem_cgroup_usage_register_event(struct cgroup *cgrp, |
@@ -4025,14 +4216,17 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
4025 | 4216 | ||
4026 | memset(mem, 0, size); | 4217 | memset(mem, 0, size); |
4027 | mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); | 4218 | mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); |
4028 | if (!mem->stat) { | 4219 | if (!mem->stat) |
4029 | if (size < PAGE_SIZE) | 4220 | goto out_free; |
4030 | kfree(mem); | 4221 | spin_lock_init(&mem->pcp_counter_lock); |
4031 | else | ||
4032 | vfree(mem); | ||
4033 | mem = NULL; | ||
4034 | } | ||
4035 | return mem; | 4222 | return mem; |
4223 | |||
4224 | out_free: | ||
4225 | if (size < PAGE_SIZE) | ||
4226 | kfree(mem); | ||
4227 | else | ||
4228 | vfree(mem); | ||
4229 | return NULL; | ||
4036 | } | 4230 | } |
4037 | 4231 | ||
4038 | /* | 4232 | /* |
@@ -4158,7 +4352,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4158 | &per_cpu(memcg_stock, cpu); | 4352 | &per_cpu(memcg_stock, cpu); |
4159 | INIT_WORK(&stock->work, drain_local_stock); | 4353 | INIT_WORK(&stock->work, drain_local_stock); |
4160 | } | 4354 | } |
4161 | hotcpu_notifier(memcg_stock_cpu_callback, 0); | 4355 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); |
4162 | } else { | 4356 | } else { |
4163 | parent = mem_cgroup_from_cont(cont->parent); | 4357 | parent = mem_cgroup_from_cont(cont->parent); |
4164 | mem->use_hierarchy = parent->use_hierarchy; | 4358 | mem->use_hierarchy = parent->use_hierarchy; |
@@ -4445,7 +4639,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4445 | unsigned long precharge; | 4639 | unsigned long precharge; |
4446 | struct vm_area_struct *vma; | 4640 | struct vm_area_struct *vma; |
4447 | 4641 | ||
4448 | down_read(&mm->mmap_sem); | 4642 | /* We've already held the mmap_sem */ |
4449 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4643 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4450 | struct mm_walk mem_cgroup_count_precharge_walk = { | 4644 | struct mm_walk mem_cgroup_count_precharge_walk = { |
4451 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | 4645 | .pmd_entry = mem_cgroup_count_precharge_pte_range, |
@@ -4457,7 +4651,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4457 | walk_page_range(vma->vm_start, vma->vm_end, | 4651 | walk_page_range(vma->vm_start, vma->vm_end, |
4458 | &mem_cgroup_count_precharge_walk); | 4652 | &mem_cgroup_count_precharge_walk); |
4459 | } | 4653 | } |
4460 | up_read(&mm->mmap_sem); | ||
4461 | 4654 | ||
4462 | precharge = mc.precharge; | 4655 | precharge = mc.precharge; |
4463 | mc.precharge = 0; | 4656 | mc.precharge = 0; |
@@ -4508,11 +4701,17 @@ static void mem_cgroup_clear_mc(void) | |||
4508 | 4701 | ||
4509 | mc.moved_swap = 0; | 4702 | mc.moved_swap = 0; |
4510 | } | 4703 | } |
4704 | if (mc.mm) { | ||
4705 | up_read(&mc.mm->mmap_sem); | ||
4706 | mmput(mc.mm); | ||
4707 | } | ||
4511 | spin_lock(&mc.lock); | 4708 | spin_lock(&mc.lock); |
4512 | mc.from = NULL; | 4709 | mc.from = NULL; |
4513 | mc.to = NULL; | 4710 | mc.to = NULL; |
4514 | mc.moving_task = NULL; | ||
4515 | spin_unlock(&mc.lock); | 4711 | spin_unlock(&mc.lock); |
4712 | mc.moving_task = NULL; | ||
4713 | mc.mm = NULL; | ||
4714 | mem_cgroup_end_move(from); | ||
4516 | memcg_oom_recover(from); | 4715 | memcg_oom_recover(from); |
4517 | memcg_oom_recover(to); | 4716 | memcg_oom_recover(to); |
4518 | wake_up_all(&mc.waitq); | 4717 | wake_up_all(&mc.waitq); |
@@ -4537,26 +4736,38 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4537 | return 0; | 4736 | return 0; |
4538 | /* We move charges only when we move a owner of the mm */ | 4737 | /* We move charges only when we move a owner of the mm */ |
4539 | if (mm->owner == p) { | 4738 | if (mm->owner == p) { |
4739 | /* | ||
4740 | * We do all the move charge works under one mmap_sem to | ||
4741 | * avoid deadlock with down_write(&mmap_sem) | ||
4742 | * -> try_charge() -> if (mc.moving_task) -> sleep. | ||
4743 | */ | ||
4744 | down_read(&mm->mmap_sem); | ||
4745 | |||
4540 | VM_BUG_ON(mc.from); | 4746 | VM_BUG_ON(mc.from); |
4541 | VM_BUG_ON(mc.to); | 4747 | VM_BUG_ON(mc.to); |
4542 | VM_BUG_ON(mc.precharge); | 4748 | VM_BUG_ON(mc.precharge); |
4543 | VM_BUG_ON(mc.moved_charge); | 4749 | VM_BUG_ON(mc.moved_charge); |
4544 | VM_BUG_ON(mc.moved_swap); | 4750 | VM_BUG_ON(mc.moved_swap); |
4545 | VM_BUG_ON(mc.moving_task); | 4751 | VM_BUG_ON(mc.moving_task); |
4752 | VM_BUG_ON(mc.mm); | ||
4753 | |||
4754 | mem_cgroup_start_move(from); | ||
4546 | spin_lock(&mc.lock); | 4755 | spin_lock(&mc.lock); |
4547 | mc.from = from; | 4756 | mc.from = from; |
4548 | mc.to = mem; | 4757 | mc.to = mem; |
4549 | mc.precharge = 0; | 4758 | mc.precharge = 0; |
4550 | mc.moved_charge = 0; | 4759 | mc.moved_charge = 0; |
4551 | mc.moved_swap = 0; | 4760 | mc.moved_swap = 0; |
4552 | mc.moving_task = current; | ||
4553 | spin_unlock(&mc.lock); | 4761 | spin_unlock(&mc.lock); |
4762 | mc.moving_task = current; | ||
4763 | mc.mm = mm; | ||
4554 | 4764 | ||
4555 | ret = mem_cgroup_precharge_mc(mm); | 4765 | ret = mem_cgroup_precharge_mc(mm); |
4556 | if (ret) | 4766 | if (ret) |
4557 | mem_cgroup_clear_mc(); | 4767 | mem_cgroup_clear_mc(); |
4558 | } | 4768 | /* We call up_read() and mmput() in clear_mc(). */ |
4559 | mmput(mm); | 4769 | } else |
4770 | mmput(mm); | ||
4560 | } | 4771 | } |
4561 | return ret; | 4772 | return ret; |
4562 | } | 4773 | } |
@@ -4644,7 +4855,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4644 | struct vm_area_struct *vma; | 4855 | struct vm_area_struct *vma; |
4645 | 4856 | ||
4646 | lru_add_drain_all(); | 4857 | lru_add_drain_all(); |
4647 | down_read(&mm->mmap_sem); | 4858 | /* We've already held the mmap_sem */ |
4648 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4859 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4649 | int ret; | 4860 | int ret; |
4650 | struct mm_walk mem_cgroup_move_charge_walk = { | 4861 | struct mm_walk mem_cgroup_move_charge_walk = { |
@@ -4663,7 +4874,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4663 | */ | 4874 | */ |
4664 | break; | 4875 | break; |
4665 | } | 4876 | } |
4666 | up_read(&mm->mmap_sem); | ||
4667 | } | 4877 | } |
4668 | 4878 | ||
4669 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 4879 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
@@ -4672,17 +4882,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
4672 | struct task_struct *p, | 4882 | struct task_struct *p, |
4673 | bool threadgroup) | 4883 | bool threadgroup) |
4674 | { | 4884 | { |
4675 | struct mm_struct *mm; | 4885 | if (!mc.mm) |
4676 | |||
4677 | if (!mc.to) | ||
4678 | /* no need to move charge */ | 4886 | /* no need to move charge */ |
4679 | return; | 4887 | return; |
4680 | 4888 | ||
4681 | mm = get_task_mm(p); | 4889 | mem_cgroup_move_charge(mc.mm); |
4682 | if (mm) { | ||
4683 | mem_cgroup_move_charge(mm); | ||
4684 | mmput(mm); | ||
4685 | } | ||
4686 | mem_cgroup_clear_mc(); | 4890 | mem_cgroup_clear_mc(); |
4687 | } | 4891 | } |
4688 | #else /* !CONFIG_MMU */ | 4892 | #else /* !CONFIG_MMU */ |
@@ -4723,10 +4927,20 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
4723 | }; | 4927 | }; |
4724 | 4928 | ||
4725 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4929 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
4930 | static int __init enable_swap_account(char *s) | ||
4931 | { | ||
4932 | /* consider enabled if no parameter or 1 is given */ | ||
4933 | if (!s || !strcmp(s, "1")) | ||
4934 | really_do_swap_account = 1; | ||
4935 | else if (!strcmp(s, "0")) | ||
4936 | really_do_swap_account = 0; | ||
4937 | return 1; | ||
4938 | } | ||
4939 | __setup("swapaccount", enable_swap_account); | ||
4726 | 4940 | ||
4727 | static int __init disable_swap_account(char *s) | 4941 | static int __init disable_swap_account(char *s) |
4728 | { | 4942 | { |
4729 | really_do_swap_account = 0; | 4943 | enable_swap_account("0"); |
4730 | return 1; | 4944 | return 1; |
4731 | } | 4945 | } |
4732 | __setup("noswapaccount", disable_swap_account); | 4946 | __setup("noswapaccount", disable_swap_account); |