diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 355 |
1 files changed, 182 insertions, 173 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 48a976c52cf5..8da627d33804 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -127,6 +127,7 @@ struct cpuset_hotplug_scanner { | |||
127 | typedef enum { | 127 | typedef enum { |
128 | CS_CPU_EXCLUSIVE, | 128 | CS_CPU_EXCLUSIVE, |
129 | CS_MEM_EXCLUSIVE, | 129 | CS_MEM_EXCLUSIVE, |
130 | CS_MEM_HARDWALL, | ||
130 | CS_MEMORY_MIGRATE, | 131 | CS_MEMORY_MIGRATE, |
131 | CS_SCHED_LOAD_BALANCE, | 132 | CS_SCHED_LOAD_BALANCE, |
132 | CS_SPREAD_PAGE, | 133 | CS_SPREAD_PAGE, |
@@ -144,6 +145,11 @@ static inline int is_mem_exclusive(const struct cpuset *cs) | |||
144 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); | 145 | return test_bit(CS_MEM_EXCLUSIVE, &cs->flags); |
145 | } | 146 | } |
146 | 147 | ||
148 | static inline int is_mem_hardwall(const struct cpuset *cs) | ||
149 | { | ||
150 | return test_bit(CS_MEM_HARDWALL, &cs->flags); | ||
151 | } | ||
152 | |||
147 | static inline int is_sched_load_balance(const struct cpuset *cs) | 153 | static inline int is_sched_load_balance(const struct cpuset *cs) |
148 | { | 154 | { |
149 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 155 | return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
@@ -735,7 +741,8 @@ static inline int started_after(void *p1, void *p2) | |||
735 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other | 741 | * Return nonzero if this tasks's cpus_allowed mask should be changed (in other |
736 | * words, if its mask is not equal to its cpuset's mask). | 742 | * words, if its mask is not equal to its cpuset's mask). |
737 | */ | 743 | */ |
738 | int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 744 | static int cpuset_test_cpumask(struct task_struct *tsk, |
745 | struct cgroup_scanner *scan) | ||
739 | { | 746 | { |
740 | return !cpus_equal(tsk->cpus_allowed, | 747 | return !cpus_equal(tsk->cpus_allowed, |
741 | (cgroup_cs(scan->cg))->cpus_allowed); | 748 | (cgroup_cs(scan->cg))->cpus_allowed); |
@@ -752,7 +759,8 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | |||
752 | * We don't need to re-check for the cgroup/cpuset membership, since we're | 759 | * We don't need to re-check for the cgroup/cpuset membership, since we're |
753 | * holding cgroup_lock() at this point. | 760 | * holding cgroup_lock() at this point. |
754 | */ | 761 | */ |
755 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 762 | static void cpuset_change_cpumask(struct task_struct *tsk, |
763 | struct cgroup_scanner *scan) | ||
756 | { | 764 | { |
757 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); | 765 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); |
758 | } | 766 | } |
@@ -1023,19 +1031,6 @@ int current_cpuset_is_being_rebound(void) | |||
1023 | return task_cs(current) == cpuset_being_rebound; | 1031 | return task_cs(current) == cpuset_being_rebound; |
1024 | } | 1032 | } |
1025 | 1033 | ||
1026 | /* | ||
1027 | * Call with cgroup_mutex held. | ||
1028 | */ | ||
1029 | |||
1030 | static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | ||
1031 | { | ||
1032 | if (simple_strtoul(buf, NULL, 10) != 0) | ||
1033 | cpuset_memory_pressure_enabled = 1; | ||
1034 | else | ||
1035 | cpuset_memory_pressure_enabled = 0; | ||
1036 | return 0; | ||
1037 | } | ||
1038 | |||
1039 | static int update_relax_domain_level(struct cpuset *cs, char *buf) | 1034 | static int update_relax_domain_level(struct cpuset *cs, char *buf) |
1040 | { | 1035 | { |
1041 | int val = simple_strtol(buf, NULL, 10); | 1036 | int val = simple_strtol(buf, NULL, 10); |
@@ -1053,25 +1048,20 @@ static int update_relax_domain_level(struct cpuset *cs, char *buf) | |||
1053 | 1048 | ||
1054 | /* | 1049 | /* |
1055 | * update_flag - read a 0 or a 1 in a file and update associated flag | 1050 | * update_flag - read a 0 or a 1 in a file and update associated flag |
1056 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 1051 | * bit: the bit to update (see cpuset_flagbits_t) |
1057 | * CS_SCHED_LOAD_BALANCE, | 1052 | * cs: the cpuset to update |
1058 | * CS_NOTIFY_ON_RELEASE, CS_MEMORY_MIGRATE, | 1053 | * turning_on: whether the flag is being set or cleared |
1059 | * CS_SPREAD_PAGE, CS_SPREAD_SLAB) | ||
1060 | * cs: the cpuset to update | ||
1061 | * buf: the buffer where we read the 0 or 1 | ||
1062 | * | 1054 | * |
1063 | * Call with cgroup_mutex held. | 1055 | * Call with cgroup_mutex held. |
1064 | */ | 1056 | */ |
1065 | 1057 | ||
1066 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) | 1058 | static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, |
1059 | int turning_on) | ||
1067 | { | 1060 | { |
1068 | int turning_on; | ||
1069 | struct cpuset trialcs; | 1061 | struct cpuset trialcs; |
1070 | int err; | 1062 | int err; |
1071 | int cpus_nonempty, balance_flag_changed; | 1063 | int cpus_nonempty, balance_flag_changed; |
1072 | 1064 | ||
1073 | turning_on = (simple_strtoul(buf, NULL, 10) != 0); | ||
1074 | |||
1075 | trialcs = *cs; | 1065 | trialcs = *cs; |
1076 | if (turning_on) | 1066 | if (turning_on) |
1077 | set_bit(bit, &trialcs.flags); | 1067 | set_bit(bit, &trialcs.flags); |
@@ -1241,6 +1231,7 @@ typedef enum { | |||
1241 | FILE_MEMLIST, | 1231 | FILE_MEMLIST, |
1242 | FILE_CPU_EXCLUSIVE, | 1232 | FILE_CPU_EXCLUSIVE, |
1243 | FILE_MEM_EXCLUSIVE, | 1233 | FILE_MEM_EXCLUSIVE, |
1234 | FILE_MEM_HARDWALL, | ||
1244 | FILE_SCHED_LOAD_BALANCE, | 1235 | FILE_SCHED_LOAD_BALANCE, |
1245 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | 1236 | FILE_SCHED_RELAX_DOMAIN_LEVEL, |
1246 | FILE_MEMORY_PRESSURE_ENABLED, | 1237 | FILE_MEMORY_PRESSURE_ENABLED, |
@@ -1289,46 +1280,71 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1289 | case FILE_MEMLIST: | 1280 | case FILE_MEMLIST: |
1290 | retval = update_nodemask(cs, buffer); | 1281 | retval = update_nodemask(cs, buffer); |
1291 | break; | 1282 | break; |
1283 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1284 | retval = update_relax_domain_level(cs, buffer); | ||
1285 | break; | ||
1286 | default: | ||
1287 | retval = -EINVAL; | ||
1288 | goto out2; | ||
1289 | } | ||
1290 | |||
1291 | if (retval == 0) | ||
1292 | retval = nbytes; | ||
1293 | out2: | ||
1294 | cgroup_unlock(); | ||
1295 | out1: | ||
1296 | kfree(buffer); | ||
1297 | return retval; | ||
1298 | } | ||
1299 | |||
1300 | static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) | ||
1301 | { | ||
1302 | int retval = 0; | ||
1303 | struct cpuset *cs = cgroup_cs(cgrp); | ||
1304 | cpuset_filetype_t type = cft->private; | ||
1305 | |||
1306 | cgroup_lock(); | ||
1307 | |||
1308 | if (cgroup_is_removed(cgrp)) { | ||
1309 | cgroup_unlock(); | ||
1310 | return -ENODEV; | ||
1311 | } | ||
1312 | |||
1313 | switch (type) { | ||
1292 | case FILE_CPU_EXCLUSIVE: | 1314 | case FILE_CPU_EXCLUSIVE: |
1293 | retval = update_flag(CS_CPU_EXCLUSIVE, cs, buffer); | 1315 | retval = update_flag(CS_CPU_EXCLUSIVE, cs, val); |
1294 | break; | 1316 | break; |
1295 | case FILE_MEM_EXCLUSIVE: | 1317 | case FILE_MEM_EXCLUSIVE: |
1296 | retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer); | 1318 | retval = update_flag(CS_MEM_EXCLUSIVE, cs, val); |
1297 | break; | 1319 | break; |
1298 | case FILE_SCHED_LOAD_BALANCE: | 1320 | case FILE_MEM_HARDWALL: |
1299 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); | 1321 | retval = update_flag(CS_MEM_HARDWALL, cs, val); |
1300 | break; | 1322 | break; |
1301 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1323 | case FILE_SCHED_LOAD_BALANCE: |
1302 | retval = update_relax_domain_level(cs, buffer); | 1324 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val); |
1303 | break; | 1325 | break; |
1304 | case FILE_MEMORY_MIGRATE: | 1326 | case FILE_MEMORY_MIGRATE: |
1305 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); | 1327 | retval = update_flag(CS_MEMORY_MIGRATE, cs, val); |
1306 | break; | 1328 | break; |
1307 | case FILE_MEMORY_PRESSURE_ENABLED: | 1329 | case FILE_MEMORY_PRESSURE_ENABLED: |
1308 | retval = update_memory_pressure_enabled(cs, buffer); | 1330 | cpuset_memory_pressure_enabled = !!val; |
1309 | break; | 1331 | break; |
1310 | case FILE_MEMORY_PRESSURE: | 1332 | case FILE_MEMORY_PRESSURE: |
1311 | retval = -EACCES; | 1333 | retval = -EACCES; |
1312 | break; | 1334 | break; |
1313 | case FILE_SPREAD_PAGE: | 1335 | case FILE_SPREAD_PAGE: |
1314 | retval = update_flag(CS_SPREAD_PAGE, cs, buffer); | 1336 | retval = update_flag(CS_SPREAD_PAGE, cs, val); |
1315 | cs->mems_generation = cpuset_mems_generation++; | 1337 | cs->mems_generation = cpuset_mems_generation++; |
1316 | break; | 1338 | break; |
1317 | case FILE_SPREAD_SLAB: | 1339 | case FILE_SPREAD_SLAB: |
1318 | retval = update_flag(CS_SPREAD_SLAB, cs, buffer); | 1340 | retval = update_flag(CS_SPREAD_SLAB, cs, val); |
1319 | cs->mems_generation = cpuset_mems_generation++; | 1341 | cs->mems_generation = cpuset_mems_generation++; |
1320 | break; | 1342 | break; |
1321 | default: | 1343 | default: |
1322 | retval = -EINVAL; | 1344 | retval = -EINVAL; |
1323 | goto out2; | 1345 | break; |
1324 | } | 1346 | } |
1325 | |||
1326 | if (retval == 0) | ||
1327 | retval = nbytes; | ||
1328 | out2: | ||
1329 | cgroup_unlock(); | 1347 | cgroup_unlock(); |
1330 | out1: | ||
1331 | kfree(buffer); | ||
1332 | return retval; | 1348 | return retval; |
1333 | } | 1349 | } |
1334 | 1350 | ||
@@ -1390,33 +1406,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, | |||
1390 | case FILE_MEMLIST: | 1406 | case FILE_MEMLIST: |
1391 | s += cpuset_sprintf_memlist(s, cs); | 1407 | s += cpuset_sprintf_memlist(s, cs); |
1392 | break; | 1408 | break; |
1393 | case FILE_CPU_EXCLUSIVE: | ||
1394 | *s++ = is_cpu_exclusive(cs) ? '1' : '0'; | ||
1395 | break; | ||
1396 | case FILE_MEM_EXCLUSIVE: | ||
1397 | *s++ = is_mem_exclusive(cs) ? '1' : '0'; | ||
1398 | break; | ||
1399 | case FILE_SCHED_LOAD_BALANCE: | ||
1400 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; | ||
1401 | break; | ||
1402 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | 1409 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: |
1403 | s += sprintf(s, "%d", cs->relax_domain_level); | 1410 | s += sprintf(s, "%d", cs->relax_domain_level); |
1404 | break; | 1411 | break; |
1405 | case FILE_MEMORY_MIGRATE: | ||
1406 | *s++ = is_memory_migrate(cs) ? '1' : '0'; | ||
1407 | break; | ||
1408 | case FILE_MEMORY_PRESSURE_ENABLED: | ||
1409 | *s++ = cpuset_memory_pressure_enabled ? '1' : '0'; | ||
1410 | break; | ||
1411 | case FILE_MEMORY_PRESSURE: | ||
1412 | s += sprintf(s, "%d", fmeter_getrate(&cs->fmeter)); | ||
1413 | break; | ||
1414 | case FILE_SPREAD_PAGE: | ||
1415 | *s++ = is_spread_page(cs) ? '1' : '0'; | ||
1416 | break; | ||
1417 | case FILE_SPREAD_SLAB: | ||
1418 | *s++ = is_spread_slab(cs) ? '1' : '0'; | ||
1419 | break; | ||
1420 | default: | 1412 | default: |
1421 | retval = -EINVAL; | 1413 | retval = -EINVAL; |
1422 | goto out; | 1414 | goto out; |
@@ -1429,121 +1421,137 @@ out: | |||
1429 | return retval; | 1421 | return retval; |
1430 | } | 1422 | } |
1431 | 1423 | ||
1432 | 1424 | static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) | |
1433 | 1425 | { | |
1426 | struct cpuset *cs = cgroup_cs(cont); | ||
1427 | cpuset_filetype_t type = cft->private; | ||
1428 | switch (type) { | ||
1429 | case FILE_CPU_EXCLUSIVE: | ||
1430 | return is_cpu_exclusive(cs); | ||
1431 | case FILE_MEM_EXCLUSIVE: | ||
1432 | return is_mem_exclusive(cs); | ||
1433 | case FILE_MEM_HARDWALL: | ||
1434 | return is_mem_hardwall(cs); | ||
1435 | case FILE_SCHED_LOAD_BALANCE: | ||
1436 | return is_sched_load_balance(cs); | ||
1437 | case FILE_MEMORY_MIGRATE: | ||
1438 | return is_memory_migrate(cs); | ||
1439 | case FILE_MEMORY_PRESSURE_ENABLED: | ||
1440 | return cpuset_memory_pressure_enabled; | ||
1441 | case FILE_MEMORY_PRESSURE: | ||
1442 | return fmeter_getrate(&cs->fmeter); | ||
1443 | case FILE_SPREAD_PAGE: | ||
1444 | return is_spread_page(cs); | ||
1445 | case FILE_SPREAD_SLAB: | ||
1446 | return is_spread_slab(cs); | ||
1447 | default: | ||
1448 | BUG(); | ||
1449 | } | ||
1450 | } | ||
1434 | 1451 | ||
1435 | 1452 | ||
1436 | /* | 1453 | /* |
1437 | * for the common functions, 'private' gives the type of file | 1454 | * for the common functions, 'private' gives the type of file |
1438 | */ | 1455 | */ |
1439 | 1456 | ||
1440 | static struct cftype cft_cpus = { | 1457 | static struct cftype files[] = { |
1441 | .name = "cpus", | 1458 | { |
1442 | .read = cpuset_common_file_read, | 1459 | .name = "cpus", |
1443 | .write = cpuset_common_file_write, | 1460 | .read = cpuset_common_file_read, |
1444 | .private = FILE_CPULIST, | 1461 | .write = cpuset_common_file_write, |
1445 | }; | 1462 | .private = FILE_CPULIST, |
1446 | 1463 | }, | |
1447 | static struct cftype cft_mems = { | 1464 | |
1448 | .name = "mems", | 1465 | { |
1449 | .read = cpuset_common_file_read, | 1466 | .name = "mems", |
1450 | .write = cpuset_common_file_write, | 1467 | .read = cpuset_common_file_read, |
1451 | .private = FILE_MEMLIST, | 1468 | .write = cpuset_common_file_write, |
1452 | }; | 1469 | .private = FILE_MEMLIST, |
1453 | 1470 | }, | |
1454 | static struct cftype cft_cpu_exclusive = { | 1471 | |
1455 | .name = "cpu_exclusive", | 1472 | { |
1456 | .read = cpuset_common_file_read, | 1473 | .name = "cpu_exclusive", |
1457 | .write = cpuset_common_file_write, | 1474 | .read_u64 = cpuset_read_u64, |
1458 | .private = FILE_CPU_EXCLUSIVE, | 1475 | .write_u64 = cpuset_write_u64, |
1459 | }; | 1476 | .private = FILE_CPU_EXCLUSIVE, |
1460 | 1477 | }, | |
1461 | static struct cftype cft_mem_exclusive = { | 1478 | |
1462 | .name = "mem_exclusive", | 1479 | { |
1463 | .read = cpuset_common_file_read, | 1480 | .name = "mem_exclusive", |
1464 | .write = cpuset_common_file_write, | 1481 | .read_u64 = cpuset_read_u64, |
1465 | .private = FILE_MEM_EXCLUSIVE, | 1482 | .write_u64 = cpuset_write_u64, |
1466 | }; | 1483 | .private = FILE_MEM_EXCLUSIVE, |
1467 | 1484 | }, | |
1468 | static struct cftype cft_sched_load_balance = { | 1485 | |
1469 | .name = "sched_load_balance", | 1486 | { |
1470 | .read = cpuset_common_file_read, | 1487 | .name = "mem_hardwall", |
1471 | .write = cpuset_common_file_write, | 1488 | .read_u64 = cpuset_read_u64, |
1472 | .private = FILE_SCHED_LOAD_BALANCE, | 1489 | .write_u64 = cpuset_write_u64, |
1473 | }; | 1490 | .private = FILE_MEM_HARDWALL, |
1474 | 1491 | }, | |
1475 | static struct cftype cft_sched_relax_domain_level = { | 1492 | |
1476 | .name = "sched_relax_domain_level", | 1493 | { |
1477 | .read = cpuset_common_file_read, | 1494 | .name = "sched_load_balance", |
1478 | .write = cpuset_common_file_write, | 1495 | .read_u64 = cpuset_read_u64, |
1479 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | 1496 | .write_u64 = cpuset_write_u64, |
1480 | }; | 1497 | .private = FILE_SCHED_LOAD_BALANCE, |
1481 | 1498 | }, | |
1482 | static struct cftype cft_memory_migrate = { | 1499 | |
1483 | .name = "memory_migrate", | 1500 | { |
1484 | .read = cpuset_common_file_read, | 1501 | .name = "sched_relax_domain_level", |
1485 | .write = cpuset_common_file_write, | 1502 | .read_u64 = cpuset_read_u64, |
1486 | .private = FILE_MEMORY_MIGRATE, | 1503 | .write_u64 = cpuset_write_u64, |
1504 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1505 | }, | ||
1506 | |||
1507 | { | ||
1508 | .name = "memory_migrate", | ||
1509 | .read_u64 = cpuset_read_u64, | ||
1510 | .write_u64 = cpuset_write_u64, | ||
1511 | .private = FILE_MEMORY_MIGRATE, | ||
1512 | }, | ||
1513 | |||
1514 | { | ||
1515 | .name = "memory_pressure", | ||
1516 | .read_u64 = cpuset_read_u64, | ||
1517 | .write_u64 = cpuset_write_u64, | ||
1518 | .private = FILE_MEMORY_PRESSURE, | ||
1519 | }, | ||
1520 | |||
1521 | { | ||
1522 | .name = "memory_spread_page", | ||
1523 | .read_u64 = cpuset_read_u64, | ||
1524 | .write_u64 = cpuset_write_u64, | ||
1525 | .private = FILE_SPREAD_PAGE, | ||
1526 | }, | ||
1527 | |||
1528 | { | ||
1529 | .name = "memory_spread_slab", | ||
1530 | .read_u64 = cpuset_read_u64, | ||
1531 | .write_u64 = cpuset_write_u64, | ||
1532 | .private = FILE_SPREAD_SLAB, | ||
1533 | }, | ||
1487 | }; | 1534 | }; |
1488 | 1535 | ||
1489 | static struct cftype cft_memory_pressure_enabled = { | 1536 | static struct cftype cft_memory_pressure_enabled = { |
1490 | .name = "memory_pressure_enabled", | 1537 | .name = "memory_pressure_enabled", |
1491 | .read = cpuset_common_file_read, | 1538 | .read_u64 = cpuset_read_u64, |
1492 | .write = cpuset_common_file_write, | 1539 | .write_u64 = cpuset_write_u64, |
1493 | .private = FILE_MEMORY_PRESSURE_ENABLED, | 1540 | .private = FILE_MEMORY_PRESSURE_ENABLED, |
1494 | }; | 1541 | }; |
1495 | 1542 | ||
1496 | static struct cftype cft_memory_pressure = { | ||
1497 | .name = "memory_pressure", | ||
1498 | .read = cpuset_common_file_read, | ||
1499 | .write = cpuset_common_file_write, | ||
1500 | .private = FILE_MEMORY_PRESSURE, | ||
1501 | }; | ||
1502 | |||
1503 | static struct cftype cft_spread_page = { | ||
1504 | .name = "memory_spread_page", | ||
1505 | .read = cpuset_common_file_read, | ||
1506 | .write = cpuset_common_file_write, | ||
1507 | .private = FILE_SPREAD_PAGE, | ||
1508 | }; | ||
1509 | |||
1510 | static struct cftype cft_spread_slab = { | ||
1511 | .name = "memory_spread_slab", | ||
1512 | .read = cpuset_common_file_read, | ||
1513 | .write = cpuset_common_file_write, | ||
1514 | .private = FILE_SPREAD_SLAB, | ||
1515 | }; | ||
1516 | |||
1517 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | 1543 | static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) |
1518 | { | 1544 | { |
1519 | int err; | 1545 | int err; |
1520 | 1546 | ||
1521 | if ((err = cgroup_add_file(cont, ss, &cft_cpus)) < 0) | 1547 | err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); |
1522 | return err; | 1548 | if (err) |
1523 | if ((err = cgroup_add_file(cont, ss, &cft_mems)) < 0) | ||
1524 | return err; | ||
1525 | if ((err = cgroup_add_file(cont, ss, &cft_cpu_exclusive)) < 0) | ||
1526 | return err; | ||
1527 | if ((err = cgroup_add_file(cont, ss, &cft_mem_exclusive)) < 0) | ||
1528 | return err; | ||
1529 | if ((err = cgroup_add_file(cont, ss, &cft_memory_migrate)) < 0) | ||
1530 | return err; | ||
1531 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) | ||
1532 | return err; | ||
1533 | if ((err = cgroup_add_file(cont, ss, | ||
1534 | &cft_sched_relax_domain_level)) < 0) | ||
1535 | return err; | ||
1536 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) | ||
1537 | return err; | ||
1538 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) | ||
1539 | return err; | ||
1540 | if ((err = cgroup_add_file(cont, ss, &cft_spread_slab)) < 0) | ||
1541 | return err; | 1549 | return err; |
1542 | /* memory_pressure_enabled is in root cpuset only */ | 1550 | /* memory_pressure_enabled is in root cpuset only */ |
1543 | if (err == 0 && !cont->parent) | 1551 | if (!cont->parent) |
1544 | err = cgroup_add_file(cont, ss, | 1552 | err = cgroup_add_file(cont, ss, |
1545 | &cft_memory_pressure_enabled); | 1553 | &cft_memory_pressure_enabled); |
1546 | return 0; | 1554 | return err; |
1547 | } | 1555 | } |
1548 | 1556 | ||
1549 | /* | 1557 | /* |
@@ -1643,7 +1651,7 @@ static void cpuset_destroy(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1643 | cpuset_update_task_memory_state(); | 1651 | cpuset_update_task_memory_state(); |
1644 | 1652 | ||
1645 | if (is_sched_load_balance(cs)) | 1653 | if (is_sched_load_balance(cs)) |
1646 | update_flag(CS_SCHED_LOAD_BALANCE, cs, "0"); | 1654 | update_flag(CS_SCHED_LOAD_BALANCE, cs, 0); |
1647 | 1655 | ||
1648 | number_of_cpusets--; | 1656 | number_of_cpusets--; |
1649 | kfree(cs); | 1657 | kfree(cs); |
@@ -1708,7 +1716,8 @@ int __init cpuset_init(void) | |||
1708 | * Called by cgroup_scan_tasks() for each task in a cgroup. | 1716 | * Called by cgroup_scan_tasks() for each task in a cgroup. |
1709 | * Return nonzero to stop the walk through the tasks. | 1717 | * Return nonzero to stop the walk through the tasks. |
1710 | */ | 1718 | */ |
1711 | void cpuset_do_move_task(struct task_struct *tsk, struct cgroup_scanner *scan) | 1719 | static void cpuset_do_move_task(struct task_struct *tsk, |
1720 | struct cgroup_scanner *scan) | ||
1712 | { | 1721 | { |
1713 | struct cpuset_hotplug_scanner *chsp; | 1722 | struct cpuset_hotplug_scanner *chsp; |
1714 | 1723 | ||
@@ -1970,14 +1979,14 @@ int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask) | |||
1970 | } | 1979 | } |
1971 | 1980 | ||
1972 | /* | 1981 | /* |
1973 | * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive | 1982 | * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or |
1974 | * ancestor to the specified cpuset. Call holding callback_mutex. | 1983 | * mem_hardwall ancestor to the specified cpuset. Call holding |
1975 | * If no ancestor is mem_exclusive (an unusual configuration), then | 1984 | * callback_mutex. If no ancestor is mem_exclusive or mem_hardwall |
1976 | * returns the root cpuset. | 1985 | * (an unusual configuration), then returns the root cpuset. |
1977 | */ | 1986 | */ |
1978 | static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | 1987 | static const struct cpuset *nearest_hardwall_ancestor(const struct cpuset *cs) |
1979 | { | 1988 | { |
1980 | while (!is_mem_exclusive(cs) && cs->parent) | 1989 | while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && cs->parent) |
1981 | cs = cs->parent; | 1990 | cs = cs->parent; |
1982 | return cs; | 1991 | return cs; |
1983 | } | 1992 | } |
@@ -1991,7 +2000,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
1991 | * __GFP_THISNODE is set, yes, we can always allocate. If zone | 2000 | * __GFP_THISNODE is set, yes, we can always allocate. If zone |
1992 | * z's node is in our tasks mems_allowed, yes. If it's not a | 2001 | * z's node is in our tasks mems_allowed, yes. If it's not a |
1993 | * __GFP_HARDWALL request and this zone's nodes is in the nearest | 2002 | * __GFP_HARDWALL request and this zone's nodes is in the nearest |
1994 | * mem_exclusive cpuset ancestor to this tasks cpuset, yes. | 2003 | * hardwalled cpuset ancestor to this tasks cpuset, yes. |
1995 | * If the task has been OOM killed and has access to memory reserves | 2004 | * If the task has been OOM killed and has access to memory reserves |
1996 | * as specified by the TIF_MEMDIE flag, yes. | 2005 | * as specified by the TIF_MEMDIE flag, yes. |
1997 | * Otherwise, no. | 2006 | * Otherwise, no. |
@@ -2014,7 +2023,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2014 | * and do not allow allocations outside the current tasks cpuset | 2023 | * and do not allow allocations outside the current tasks cpuset |
2015 | * unless the task has been OOM killed as is marked TIF_MEMDIE. | 2024 | * unless the task has been OOM killed as is marked TIF_MEMDIE. |
2016 | * GFP_KERNEL allocations are not so marked, so can escape to the | 2025 | * GFP_KERNEL allocations are not so marked, so can escape to the |
2017 | * nearest enclosing mem_exclusive ancestor cpuset. | 2026 | * nearest enclosing hardwalled ancestor cpuset. |
2018 | * | 2027 | * |
2019 | * Scanning up parent cpusets requires callback_mutex. The | 2028 | * Scanning up parent cpusets requires callback_mutex. The |
2020 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit | 2029 | * __alloc_pages() routine only calls here with __GFP_HARDWALL bit |
@@ -2037,7 +2046,7 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) | |||
2037 | * in_interrupt - any node ok (current task context irrelevant) | 2046 | * in_interrupt - any node ok (current task context irrelevant) |
2038 | * GFP_ATOMIC - any node ok | 2047 | * GFP_ATOMIC - any node ok |
2039 | * TIF_MEMDIE - any node ok | 2048 | * TIF_MEMDIE - any node ok |
2040 | * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok | 2049 | * GFP_KERNEL - any node in enclosing hardwalled cpuset ok |
2041 | * GFP_USER - only nodes in current tasks mems allowed ok. | 2050 | * GFP_USER - only nodes in current tasks mems allowed ok. |
2042 | * | 2051 | * |
2043 | * Rule: | 2052 | * Rule: |
@@ -2074,7 +2083,7 @@ int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) | |||
2074 | mutex_lock(&callback_mutex); | 2083 | mutex_lock(&callback_mutex); |
2075 | 2084 | ||
2076 | task_lock(current); | 2085 | task_lock(current); |
2077 | cs = nearest_exclusive_ancestor(task_cs(current)); | 2086 | cs = nearest_hardwall_ancestor(task_cs(current)); |
2078 | task_unlock(current); | 2087 | task_unlock(current); |
2079 | 2088 | ||
2080 | allowed = node_isset(node, cs->mems_allowed); | 2089 | allowed = node_isset(node, cs->mems_allowed); |