aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c48
1 files changed, 32 insertions, 16 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index fe5407ca2f1e..8da627d33804 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner {
127typedef enum { 127typedef enum {
128 CS_CPU_EXCLUSIVE, 128 CS_CPU_EXCLUSIVE,
129 CS_MEM_EXCLUSIVE, 129 CS_MEM_EXCLUSIVE,
130 CS_MEM_HARDWALL,
130 CS_MEMORY_MIGRATE, 131 CS_MEMORY_MIGRATE,
131 CS_SCHED_LOAD_BALANCE, 132 CS_SCHED_LOAD_BALANCE,
132 CS_SPREAD_PAGE, 133 CS_SPREAD_PAGE,
@@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs)
144 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); 145 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
145} 146}
146 147
148static inline int is_mem_hardwall(const struct cpuset *cs)
149{
150 return test_bit(CS_MEM_HARDWALL, &cs->flags);
151}
152
147static inline int is_sched_load_balance(const struct cpuset *cs) 153static inline int is_sched_load_balance(const struct cpuset *cs)
148{ 154{
149 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 155 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
@@ -1042,12 +1048,9 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf)
1042 1048
1043/* 1049/*
1044 * update_flag - read a 0 or a 1 in a file and update associated flag 1050 * update_flag - read a 0 or a 1 in a file and update associated flag
1045 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 1051 * bit: the bit to update (see cpuset_flagbits_t)
1046 * CS_SCHED_LOAD_BALANCE, 1052 * cs: the cpuset to update
1047 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, 1053 * turning_on: whether the flag is being set or cleared
1048 * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
1049 * cs: the cpuset to update
1050 * buf: the buffer where we read the 0 or 1
1051 * 1054 *
1052 * Call with cgroup_mutex held. 1055 * Call with cgroup_mutex held.
1053 */ 1056 */
@@ -1228,6 +1231,7 @@ typedef enum {
1228 FILE_MEMLIST, 1231 FILE_MEMLIST,
1229 FILE_CPU_EXCLUSIVE, 1232 FILE_CPU_EXCLUSIVE,
1230 FILE_MEM_EXCLUSIVE, 1233 FILE_MEM_EXCLUSIVE,
1234 FILE_MEM_HARDWALL,
1231 FILE_SCHED_LOAD_BALANCE, 1235 FILE_SCHED_LOAD_BALANCE,
1232 FILE_SCHED_RELAX_DOMAIN_LEVEL, 1236 FILE_SCHED_RELAX_DOMAIN_LEVEL,
1233 FILE_MEMORY_PRESSURE_ENABLED, 1237 FILE_MEMORY_PRESSURE_ENABLED,
@@ -1313,6 +1317,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1313 case FILE_MEM_EXCLUSIVE: 1317 case FILE_MEM_EXCLUSIVE:
1314 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); 1318 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
1315 break; 1319 break;
1320 case FILE_MEM_HARDWALL:
1321 retval = update_flag(CS_MEM_HARDWALL, cs, val);
1322 break;
1316 case FILE_SCHED_LOAD_BALANCE: 1323 case FILE_SCHED_LOAD_BALANCE:
1317 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); 1324 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
1318 break; 1325 break;
@@ -1423,6 +1430,8 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
1423 return is_cpu_exclusive(cs); 1430 return is_cpu_exclusive(cs);
1424 case FILE_MEM_EXCLUSIVE: 1431 case FILE_MEM_EXCLUSIVE:
1425 return is_mem_exclusive(cs); 1432 return is_mem_exclusive(cs);
1433 case FILE_MEM_HARDWALL:
1434 return is_mem_hardwall(cs);
1426 case FILE_SCHED_LOAD_BALANCE: 1435 case FILE_SCHED_LOAD_BALANCE:
1427 return is_sched_load_balance(cs); 1436 return is_sched_load_balance(cs);
1428 case FILE_MEMORY_MIGRATE: 1437 case FILE_MEMORY_MIGRATE:
@@ -1475,6 +1484,13 @@ static struct cftype files[] = {
1475 }, 1484 },
1476 1485
1477 { 1486 {
1487 .name = "mem_hardwall",
1488 .read_u64 = cpuset_read_u64,
1489 .write_u64 = cpuset_write_u64,
1490 .private = FILE_MEM_HARDWALL,
1491 },
1492
1493 {
1478 .name = "sched_load_balance", 1494 .name = "sched_load_balance",
1479 .read_u64 = cpuset_read_u64, 1495 .read_u64 = cpuset_read_u64,
1480 .write_u64 = cpuset_write_u64, 1496 .write_u64 = cpuset_write_u64,
@@ -1963,14 +1979,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
1963} 1979}
1964 1980
1965/* 1981/*
1966 * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive 1982 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
1967 * ancestor to the specified cpuset. Call holding callback_mutex. 1983 * mem_hardwall ancestor to the specified cpuset. Call holding
1968 * If no ancestor is mem_exclusive (an unusual configuration), then 1984 * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall
1969 * returns the root cpuset. 1985 * (an unusual configuration), then returns the root cpuset.
1970 */ 1986 */
1971static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) 1987static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
1972{ 1988{
1973 while (!is_mem_exclusive(cs) && cs->parent) 1989 while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent)
1974 cs = cs->parent; 1990 cs = cs->parent;
1975 return cs; 1991 return cs;
1976} 1992}
@@ -1984,7 +2000,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1984 * __GFP_THISNODE is set, yes, we can always allocate. If zone 2000 * __GFP_THISNODE is set, yes, we can always allocate. If zone
1985 * z's node is in our tasks mems_allowed, yes. If it's not a 2001 * z's node is in our tasks mems_allowed, yes. If it's not a
1986 * __GFP_HARDWALL request and this zone's nodes is in the nearest 2002 * __GFP_HARDWALL request and this zone's nodes is in the nearest
1987 * mem_exclusive cpuset ancestor to this tasks cpuset, yes. 2003 * hardwalled cpuset ancestor to this tasks cpuset, yes.
1988 * If the task has been OOM killed and has access to memory reserves 2004 * If the task has been OOM killed and has access to memory reserves
1989 * as specified by the TIF_MEMDIE flag, yes. 2005 * as specified by the TIF_MEMDIE flag, yes.
1990 * Otherwise, no. 2006 * Otherwise, no.
@@ -2007,7 +2023,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2007 * and do not allow allocations outside the current tasks cpuset 2023 * and do not allow allocations outside the current tasks cpuset
2008 * unless the task has been OOM killed as is marked TIF_MEMDIE. 2024 * unless the task has been OOM killed as is marked TIF_MEMDIE.
2009 * GFP_KERNEL allocations are not so marked, so can escape to the 2025 * GFP_KERNEL allocations are not so marked, so can escape to the
2010 * nearest enclosing mem_exclusive ancestor cpuset. 2026 * nearest enclosing hardwalled ancestor cpuset.
2011 * 2027 *
2012 * Scanning up parent cpusets requires callback_mutex. The 2028 * Scanning up parent cpusets requires callback_mutex. The
2013 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit 2029 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
@@ -2030,7 +2046,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2030 * in_interrupt - any node ok (current task context irrelevant) 2046 * in_interrupt - any node ok (current task context irrelevant)
2031 * GFP_ATOMIC - any node ok 2047 * GFP_ATOMIC - any node ok
2032 * TIF_MEMDIE - any node ok 2048 * TIF_MEMDIE - any node ok
2033 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok 2049 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
2034 * GFP_USER - only nodes in current tasks mems allowed ok. 2050 * GFP_USER - only nodes in current tasks mems allowed ok.
2035 * 2051 *
2036 * Rule: 2052 * Rule:
@@ -2067,7 +2083,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2067 mutex_lock(&callback_mutex); 2083 mutex_lock(&callback_mutex);
2068 2084
2069 task_lock(current); 2085 task_lock(current);
2070 cs = nearest_exclusive_ancestor(task_cs(current)); 2086 cs = nearest_hardwall_ancestor(task_cs(current));
2071 task_unlock(current); 2087 task_unlock(current);
2072 2088
2073 allowed = node_isset(node, cs->mems_allowed); 2089 allowed = node_isset(node, cs->mems_allowed);