aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c355
1 files changed, 182 insertions, 173 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 48a976c52cf5..8da627d33804 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner {
127typedef enum { 127typedef enum {
128 CS_CPU_EXCLUSIVE, 128 CS_CPU_EXCLUSIVE,
129 CS_MEM_EXCLUSIVE, 129 CS_MEM_EXCLUSIVE,
130 CS_MEM_HARDWALL,
130 CS_MEMORY_MIGRATE, 131 CS_MEMORY_MIGRATE,
131 CS_SCHED_LOAD_BALANCE, 132 CS_SCHED_LOAD_BALANCE,
132 CS_SPREAD_PAGE, 133 CS_SPREAD_PAGE,
@@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs)
144 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); 145 return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
145} 146}
146 147
148static inline int is_mem_hardwall(const struct cpuset *cs)
149{
150 return test_bit(CS_MEM_HARDWALL, &cs->flags);
151}
152
147static inline int is_sched_load_balance(const struct cpuset *cs) 153static inline int is_sched_load_balance(const struct cpuset *cs)
148{ 154{
149 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 155 return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
@@ -735,7 +741,8 @@ static inline int started_after(void *p1, void *p2)
735 * Return nonzero if this tasks's cpus_allowed mask should be changed (in other 741 * Return nonzero if this tasks's cpus_allowed mask should be changed (in other
736 * words, if its mask is not equal to its cpuset's mask). 742 * words, if its mask is not equal to its cpuset's mask).
737 */ 743 */
738int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) 744static int cpuset_test_cpumask(struct task_struct *tsk,
745 struct cgroup_scanner *scan)
739{ 746{
740 return !cpus_equal(tsk->cpus_allowed, 747 return !cpus_equal(tsk->cpus_allowed,
741 (cgroup_cs(scan->cg))->cpus_allowed); 748 (cgroup_cs(scan->cg))->cpus_allowed);
@@ -752,7 +759,8 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
752 * We don't need to re-check for the cgroup/cpuset membership, since we're 759 * We don't need to re-check for the cgroup/cpuset membership, since we're
753 * holding cgroup_lock() at this point. 760 * holding cgroup_lock() at this point.
754 */ 761 */
755void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) 762static void cpuset_change_cpumask(struct task_struct *tsk,
763 struct cgroup_scanner *scan)
756{ 764{
757 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); 765 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
758} 766}
@@ -1023,19 +1031,6 @@ int current_cpuset_is_being_rebound(void)
1023 return task_cs(current) == cpuset_being_rebound; 1031 return task_cs(current) == cpuset_being_rebound;
1024} 1032}
1025 1033
1026/*
1027 * Call with cgroup_mutex held.
1028 */
1029
1030static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
1031{
1032 if (simple_strtoul(buf, NULL, 10) != 0)
1033 cpuset_memory_pressure_enabled = 1;
1034 else
1035 cpuset_memory_pressure_enabled = 0;
1036 return 0;
1037}
1038
1039static int update_relax_domain_level(struct cpuset *cs, char *buf) 1034static int update_relax_domain_level(struct cpuset *cs, char *buf)
1040{ 1035{
1041 int val = simple_strtol(buf, NULL, 10); 1036 int val = simple_strtol(buf, NULL, 10);
@@ -1053,25 +1048,20 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf)
1053 1048
1054/* 1049/*
1055 * update_flag - read a 0 or a 1 in a file and update associated flag 1050 * update_flag - read a 0 or a 1 in a file and update associated flag
1056 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 1051 * bit: the bit to update (see cpuset_flagbits_t)
1057 * CS_SCHED_LOAD_BALANCE, 1052 * cs: the cpuset to update
1058 * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, 1053 * turning_on: whether the flag is being set or cleared
1059 * CS_SPREAD_PAGE, CS_SPREAD_SLAB)
1060 * cs: the cpuset to update
1061 * buf: the buffer where we read the 0 or 1
1062 * 1054 *
1063 * Call with cgroup_mutex held. 1055 * Call with cgroup_mutex held.
1064 */ 1056 */
1065 1057
1066static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) 1058static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
1059 int turning_on)
1067{ 1060{
1068 int turning_on;
1069 struct cpuset trialcs; 1061 struct cpuset trialcs;
1070 int err; 1062 int err;
1071 int cpus_nonempty, balance_flag_changed; 1063 int cpus_nonempty, balance_flag_changed;
1072 1064
1073 turning_on = (simple_strtoul(buf, NULL, 10) != 0);
1074
1075 trialcs = *cs; 1065 trialcs = *cs;
1076 if (turning_on) 1066 if (turning_on)
1077 set_bit(bit, &trialcs.flags); 1067 set_bit(bit, &trialcs.flags);
@@ -1241,6 +1231,7 @@ typedef enum {
1241 FILE_MEMLIST, 1231 FILE_MEMLIST,
1242 FILE_CPU_EXCLUSIVE, 1232 FILE_CPU_EXCLUSIVE,
1243 FILE_MEM_EXCLUSIVE, 1233 FILE_MEM_EXCLUSIVE,
1234 FILE_MEM_HARDWALL,
1244 FILE_SCHED_LOAD_BALANCE, 1235 FILE_SCHED_LOAD_BALANCE,
1245 FILE_SCHED_RELAX_DOMAIN_LEVEL, 1236 FILE_SCHED_RELAX_DOMAIN_LEVEL,
1246 FILE_MEMORY_PRESSURE_ENABLED, 1237 FILE_MEMORY_PRESSURE_ENABLED,
@@ -1289,46 +1280,71 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
1289 case FILE_MEMLIST: 1280 case FILE_MEMLIST:
1290 retval = update_nodemask(cs, buffer); 1281 retval = update_nodemask(cs, buffer);
1291 break; 1282 break;
1283 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1284 retval = update_relax_domain_level(cs, buffer);
1285 break;
1286 default:
1287 retval = -EINVAL;
1288 goto out2;
1289 }
1290
1291 if (retval == 0)
1292 retval = nbytes;
1293out2:
1294 cgroup_unlock();
1295out1:
1296 kfree(buffer);
1297 return retval;
1298}
1299
1300static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1301{
1302 int retval = 0;
1303 struct cpuset *cs = cgroup_cs(cgrp);
1304 cpuset_filetype_t type = cft->private;
1305
1306 cgroup_lock();
1307
1308 if (cgroup_is_removed(cgrp)) {
1309 cgroup_unlock();
1310 return -ENODEV;
1311 }
1312
1313 switch (type) {
1292 case FILE_CPU_EXCLUSIVE: 1314 case FILE_CPU_EXCLUSIVE:
1293 retval = update_flag(CS_CPU_EXCLUSIVE, cs, buffer); 1315 retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
1294 break; 1316 break;
1295 case FILE_MEM_EXCLUSIVE: 1317 case FILE_MEM_EXCLUSIVE:
1296 retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); 1318 retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
1297 break; 1319 break;
1298 case FILE_SCHED_LOAD_BALANCE: 1320 case FILE_MEM_HARDWALL:
1299 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); 1321 retval = update_flag(CS_MEM_HARDWALL, cs, val);
1300 break; 1322 break;
1301 case FILE_SCHED_RELAX_DOMAIN_LEVEL: 1323 case FILE_SCHED_LOAD_BALANCE:
1302 retval = update_relax_domain_level(cs, buffer); 1324 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
1303 break; 1325 break;
1304 case FILE_MEMORY_MIGRATE: 1326 case FILE_MEMORY_MIGRATE:
1305 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); 1327 retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
1306 break; 1328 break;
1307 case FILE_MEMORY_PRESSURE_ENABLED: 1329 case FILE_MEMORY_PRESSURE_ENABLED:
1308 retval = update_memory_pressure_enabled(cs, buffer); 1330 cpuset_memory_pressure_enabled = !!val;
1309 break; 1331 break;
1310 case FILE_MEMORY_PRESSURE: 1332 case FILE_MEMORY_PRESSURE:
1311 retval = -EACCES; 1333 retval = -EACCES;
1312 break; 1334 break;
1313 case FILE_SPREAD_PAGE: 1335 case FILE_SPREAD_PAGE:
1314 retval = update_flag(CS_SPREAD_PAGE, cs, buffer); 1336 retval = update_flag(CS_SPREAD_PAGE, cs, val);
1315 cs->mems_generation = cpuset_mems_generation++; 1337 cs->mems_generation = cpuset_mems_generation++;
1316 break; 1338 break;
1317 case FILE_SPREAD_SLAB: 1339 case FILE_SPREAD_SLAB:
1318 retval = update_flag(CS_SPREAD_SLAB, cs, buffer); 1340 retval = update_flag(CS_SPREAD_SLAB, cs, val);
1319 cs->mems_generation = cpuset_mems_generation++; 1341 cs->mems_generation = cpuset_mems_generation++;
1320 break; 1342 break;
1321 default: 1343 default:
1322 retval = -EINVAL; 1344 retval = -EINVAL;
1323 goto out2; 1345 break;
1324 } 1346 }
1325
1326 if (retval == 0)
1327 retval = nbytes;
1328out2:
1329 cgroup_unlock(); 1347 cgroup_unlock();
1330out1:
1331 kfree(buffer);
1332 return retval; 1348 return retval;
1333} 1349}
1334 1350
@@ -1390,33 +1406,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
1390 case FILE_MEMLIST: 1406 case FILE_MEMLIST:
1391 s += cpuset_sprintf_memlist(s, cs); 1407 s += cpuset_sprintf_memlist(s, cs);
1392 break; 1408 break;
1393 case FILE_CPU_EXCLUSIVE:
1394 *s++ = is_cpu_exclusive(cs) ? '1' : '0';
1395 break;
1396 case FILE_MEM_EXCLUSIVE:
1397 *s++ = is_mem_exclusive(cs) ? '1' : '0';
1398 break;
1399 case FILE_SCHED_LOAD_BALANCE:
1400 *s++ = is_sched_load_balance(cs) ? '1' : '0';
1401 break;
1402 case FILE_SCHED_RELAX_DOMAIN_LEVEL: 1409 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1403 s += sprintf(s, "%d", cs->relax_domain_level); 1410 s += sprintf(s, "%d", cs->relax_domain_level);
1404 break; 1411 break;
1405 case FILE_MEMORY_MIGRATE:
1406 *s++ = is_memory_migrate(cs) ? '1' : '0';
1407 break;
1408 case FILE_MEMORY_PRESSURE_ENABLED:
1409 *s++ = cpuset_memory_pressure_enabled ? '1' : '0';
1410 break;
1411 case FILE_MEMORY_PRESSURE:
1412 s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter));
1413 break;
1414 case FILE_SPREAD_PAGE:
1415 *s++ = is_spread_page(cs) ? '1' : '0';
1416 break;
1417 case FILE_SPREAD_SLAB:
1418 *s++ = is_spread_slab(cs) ? '1' : '0';
1419 break;
1420 default: 1412 default:
1421 retval = -EINVAL; 1413 retval = -EINVAL;
1422 goto out; 1414 goto out;
@@ -1429,121 +1421,137 @@ out:
1429 return retval; 1421 return retval;
1430} 1422}
1431 1423
1432 1424static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
1433 1425{
1426 struct cpuset *cs = cgroup_cs(cont);
1427 cpuset_filetype_t type = cft->private;
1428 switch (type) {
1429 case FILE_CPU_EXCLUSIVE:
1430 return is_cpu_exclusive(cs);
1431 case FILE_MEM_EXCLUSIVE:
1432 return is_mem_exclusive(cs);
1433 case FILE_MEM_HARDWALL:
1434 return is_mem_hardwall(cs);
1435 case FILE_SCHED_LOAD_BALANCE:
1436 return is_sched_load_balance(cs);
1437 case FILE_MEMORY_MIGRATE:
1438 return is_memory_migrate(cs);
1439 case FILE_MEMORY_PRESSURE_ENABLED:
1440 return cpuset_memory_pressure_enabled;
1441 case FILE_MEMORY_PRESSURE:
1442 return fmeter_getrate(&cs->fmeter);
1443 case FILE_SPREAD_PAGE:
1444 return is_spread_page(cs);
1445 case FILE_SPREAD_SLAB:
1446 return is_spread_slab(cs);
1447 default:
1448 BUG();
1449 }
1450}
1434 1451
1435 1452
1436/* 1453/*
1437 * for the common functions, 'private' gives the type of file 1454 * for the common functions, 'private' gives the type of file
1438 */ 1455 */
1439 1456
1440static struct cftype cft_cpus = { 1457static struct cftype files[] = {
1441 .name = "cpus", 1458 {
1442 .read = cpuset_common_file_read, 1459 .name = "cpus",
1443 .write = cpuset_common_file_write, 1460 .read = cpuset_common_file_read,
1444 .private = FILE_CPULIST, 1461 .write = cpuset_common_file_write,
1445}; 1462 .private = FILE_CPULIST,
1446 1463 },
1447static struct cftype cft_mems = { 1464
1448 .name = "mems", 1465 {
1449 .read = cpuset_common_file_read, 1466 .name = "mems",
1450 .write = cpuset_common_file_write, 1467 .read = cpuset_common_file_read,
1451 .private = FILE_MEMLIST, 1468 .write = cpuset_common_file_write,
1452}; 1469 .private = FILE_MEMLIST,
1453 1470 },
1454static struct cftype cft_cpu_exclusive = { 1471
1455 .name = "cpu_exclusive", 1472 {
1456 .read = cpuset_common_file_read, 1473 .name = "cpu_exclusive",
1457 .write = cpuset_common_file_write, 1474 .read_u64 = cpuset_read_u64,
1458 .private = FILE_CPU_EXCLUSIVE, 1475 .write_u64 = cpuset_write_u64,
1459}; 1476 .private = FILE_CPU_EXCLUSIVE,
1460 1477 },
1461static struct cftype cft_mem_exclusive = { 1478
1462 .name = "mem_exclusive", 1479 {
1463 .read = cpuset_common_file_read, 1480 .name = "mem_exclusive",
1464 .write = cpuset_common_file_write, 1481 .read_u64 = cpuset_read_u64,
1465 .private = FILE_MEM_EXCLUSIVE, 1482 .write_u64 = cpuset_write_u64,
1466}; 1483 .private = FILE_MEM_EXCLUSIVE,
1467 1484 },
1468static struct cftype cft_sched_load_balance = { 1485
1469 .name = "sched_load_balance", 1486 {
1470 .read = cpuset_common_file_read, 1487 .name = "mem_hardwall",
1471 .write = cpuset_common_file_write, 1488 .read_u64 = cpuset_read_u64,
1472 .private = FILE_SCHED_LOAD_BALANCE, 1489 .write_u64 = cpuset_write_u64,
1473}; 1490 .private = FILE_MEM_HARDWALL,
1474 1491 },
1475static struct cftype cft_sched_relax_domain_level = { 1492
1476 .name = "sched_relax_domain_level", 1493 {
1477 .read = cpuset_common_file_read, 1494 .name = "sched_load_balance",
1478 .write = cpuset_common_file_write, 1495 .read_u64 = cpuset_read_u64,
1479 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, 1496 .write_u64 = cpuset_write_u64,
1480}; 1497 .private = FILE_SCHED_LOAD_BALANCE,
1481 1498 },
1482static struct cftype cft_memory_migrate = { 1499
1483 .name = "memory_migrate", 1500 {
1484 .read = cpuset_common_file_read, 1501 .name = "sched_relax_domain_level",
1485 .write = cpuset_common_file_write, 1502 .read_u64 = cpuset_read_u64,
1486 .private = FILE_MEMORY_MIGRATE, 1503 .write_u64 = cpuset_write_u64,
1504 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
1505 },
1506
1507 {
1508 .name = "memory_migrate",
1509 .read_u64 = cpuset_read_u64,
1510 .write_u64 = cpuset_write_u64,
1511 .private = FILE_MEMORY_MIGRATE,
1512 },
1513
1514 {
1515 .name = "memory_pressure",
1516 .read_u64 = cpuset_read_u64,
1517 .write_u64 = cpuset_write_u64,
1518 .private = FILE_MEMORY_PRESSURE,
1519 },
1520
1521 {
1522 .name = "memory_spread_page",
1523 .read_u64 = cpuset_read_u64,
1524 .write_u64 = cpuset_write_u64,
1525 .private = FILE_SPREAD_PAGE,
1526 },
1527
1528 {
1529 .name = "memory_spread_slab",
1530 .read_u64 = cpuset_read_u64,
1531 .write_u64 = cpuset_write_u64,
1532 .private = FILE_SPREAD_SLAB,
1533 },
1487}; 1534};
1488 1535
1489static struct cftype cft_memory_pressure_enabled = { 1536static struct cftype cft_memory_pressure_enabled = {
1490 .name = "memory_pressure_enabled", 1537 .name = "memory_pressure_enabled",
1491 .read = cpuset_common_file_read, 1538 .read_u64 = cpuset_read_u64,
1492 .write = cpuset_common_file_write, 1539 .write_u64 = cpuset_write_u64,
1493 .private = FILE_MEMORY_PRESSURE_ENABLED, 1540 .private = FILE_MEMORY_PRESSURE_ENABLED,
1494}; 1541};
1495 1542
1496static struct cftype cft_memory_pressure = {
1497 .name = "memory_pressure",
1498 .read = cpuset_common_file_read,
1499 .write = cpuset_common_file_write,
1500 .private = FILE_MEMORY_PRESSURE,
1501};
1502
1503static struct cftype cft_spread_page = {
1504 .name = "memory_spread_page",
1505 .read = cpuset_common_file_read,
1506 .write = cpuset_common_file_write,
1507 .private = FILE_SPREAD_PAGE,
1508};
1509
1510static struct cftype cft_spread_slab = {
1511 .name = "memory_spread_slab",
1512 .read = cpuset_common_file_read,
1513 .write = cpuset_common_file_write,
1514 .private = FILE_SPREAD_SLAB,
1515};
1516
1517static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) 1543static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1518{ 1544{
1519 int err; 1545 int err;
1520 1546
1521 if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0) 1547 err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
1522 return err; 1548 if (err)
1523 if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0)
1524 return err;
1525 if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0)
1526 return err;
1527 if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0)
1528 return err;
1529 if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0)
1530 return err;
1531 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
1532 return err;
1533 if ((err = cgroup_add_file(cont, ss,
1534 &cft_sched_relax_domain_level)) < 0)
1535 return err;
1536 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
1537 return err;
1538 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
1539 return err;
1540 if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0)
1541 return err; 1549 return err;
1542 /* memory_pressure_enabled is in root cpuset only */ 1550 /* memory_pressure_enabled is in root cpuset only */
1543 if (err == 0 && !cont->parent) 1551 if (!cont->parent)
1544 err = cgroup_add_file(cont, ss, 1552 err = cgroup_add_file(cont, ss,
1545 &cft_memory_pressure_enabled); 1553 &cft_memory_pressure_enabled);
1546 return 0; 1554 return err;
1547} 1555}
1548 1556
1549/* 1557/*
@@ -1643,7 +1651,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
1643 cpuset_update_task_memory_state(); 1651 cpuset_update_task_memory_state();
1644 1652
1645 if (is_sched_load_balance(cs)) 1653 if (is_sched_load_balance(cs))
1646 update_flag(CS_SCHED_LOAD_BALANCE, cs, "0"); 1654 update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
1647 1655
1648 number_of_cpusets--; 1656 number_of_cpusets--;
1649 kfree(cs); 1657 kfree(cs);
@@ -1708,7 +1716,8 @@ int __init cpuset_init(void)
1708 * Called by cgroup_scan_tasks() for each task in a cgroup. 1716 * Called by cgroup_scan_tasks() for each task in a cgroup.
1709 * Return nonzero to stop the walk through the tasks. 1717 * Return nonzero to stop the walk through the tasks.
1710 */ 1718 */
1711void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan) 1719static void cpuset_do_move_task(struct task_struct *tsk,
1720 struct cgroup_scanner *scan)
1712{ 1721{
1713 struct cpuset_hotplug_scanner *chsp; 1722 struct cpuset_hotplug_scanner *chsp;
1714 1723
@@ -1970,14 +1979,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
1970} 1979}
1971 1980
1972/* 1981/*
1973 * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive 1982 * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
1974 * ancestor to the specified cpuset. Call holding callback_mutex. 1983 * mem_hardwall ancestor to the specified cpuset. Call holding
1975 * If no ancestor is mem_exclusive (an unusual configuration), then 1984 * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall
1976 * returns the root cpuset. 1985 * (an unusual configuration), then returns the root cpuset.
1977 */ 1986 */
1978static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) 1987static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs)
1979{ 1988{
1980 while (!is_mem_exclusive(cs) && cs->parent) 1989 while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent)
1981 cs = cs->parent; 1990 cs = cs->parent;
1982 return cs; 1991 return cs;
1983} 1992}
@@ -1991,7 +2000,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
1991 * __GFP_THISNODE is set, yes, we can always allocate. If zone 2000 * __GFP_THISNODE is set, yes, we can always allocate. If zone
1992 * z's node is in our tasks mems_allowed, yes. If it's not a 2001 * z's node is in our tasks mems_allowed, yes. If it's not a
1993 * __GFP_HARDWALL request and this zone's nodes is in the nearest 2002 * __GFP_HARDWALL request and this zone's nodes is in the nearest
1994 * mem_exclusive cpuset ancestor to this tasks cpuset, yes. 2003 * hardwalled cpuset ancestor to this tasks cpuset, yes.
1995 * If the task has been OOM killed and has access to memory reserves 2004 * If the task has been OOM killed and has access to memory reserves
1996 * as specified by the TIF_MEMDIE flag, yes. 2005 * as specified by the TIF_MEMDIE flag, yes.
1997 * Otherwise, no. 2006 * Otherwise, no.
@@ -2014,7 +2023,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2014 * and do not allow allocations outside the current tasks cpuset 2023 * and do not allow allocations outside the current tasks cpuset
2015 * unless the task has been OOM killed as is marked TIF_MEMDIE. 2024 * unless the task has been OOM killed as is marked TIF_MEMDIE.
2016 * GFP_KERNEL allocations are not so marked, so can escape to the 2025 * GFP_KERNEL allocations are not so marked, so can escape to the
2017 * nearest enclosing mem_exclusive ancestor cpuset. 2026 * nearest enclosing hardwalled ancestor cpuset.
2018 * 2027 *
2019 * Scanning up parent cpusets requires callback_mutex. The 2028 * Scanning up parent cpusets requires callback_mutex. The
2020 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit 2029 * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
@@ -2037,7 +2046,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
2037 * in_interrupt - any node ok (current task context irrelevant) 2046 * in_interrupt - any node ok (current task context irrelevant)
2038 * GFP_ATOMIC - any node ok 2047 * GFP_ATOMIC - any node ok
2039 * TIF_MEMDIE - any node ok 2048 * TIF_MEMDIE - any node ok
2040 * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok 2049 * GFP_KERNEL - any node in enclosing hardwalled cpuset ok
2041 * GFP_USER - only nodes in current tasks mems allowed ok. 2050 * GFP_USER - only nodes in current tasks mems allowed ok.
2042 * 2051 *
2043 * Rule: 2052 * Rule:
@@ -2074,7 +2083,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
2074 mutex_lock(&callback_mutex); 2083 mutex_lock(&callback_mutex);
2075 2084
2076 task_lock(current); 2085 task_lock(current);
2077 cs = nearest_exclusive_ancestor(task_cs(current)); 2086 cs = nearest_hardwall_ancestor(task_cs(current));
2078 task_unlock(current); 2087 task_unlock(current);
2079 2088
2080 allowed = node_isset(node, cs->mems_allowed); 2089 allowed = node_isset(node, cs->mems_allowed);