diff options
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 100 |
1 files changed, 80 insertions, 20 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index a1b61f414228..8b35fbd8292f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -98,6 +98,9 @@ struct cpuset { | |||
98 | /* partition number for rebuild_sched_domains() */ | 98 | /* partition number for rebuild_sched_domains() */ |
99 | int pn; | 99 | int pn; |
100 | 100 | ||
101 | /* for custom sched domain */ | ||
102 | int relax_domain_level; | ||
103 | |||
101 | /* used for walking a cpuset heirarchy */ | 104 | /* used for walking a cpuset heirarchy */ |
102 | struct list_head stack_list; | 105 | struct list_head stack_list; |
103 | }; | 106 | }; |
@@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | |||
478 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); | 481 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); |
479 | } | 482 | } |
480 | 483 | ||
484 | static void | ||
485 | update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | ||
486 | { | ||
487 | if (!dattr) | ||
488 | return; | ||
489 | if (dattr->relax_domain_level < c->relax_domain_level) | ||
490 | dattr->relax_domain_level = c->relax_domain_level; | ||
491 | return; | ||
492 | } | ||
493 | |||
481 | /* | 494 | /* |
482 | * rebuild_sched_domains() | 495 | * rebuild_sched_domains() |
483 | * | 496 | * |
@@ -553,12 +566,14 @@ static void rebuild_sched_domains(void) | |||
553 | int csn; /* how many cpuset ptrs in csa so far */ | 566 | int csn; /* how many cpuset ptrs in csa so far */ |
554 | int i, j, k; /* indices for partition finding loops */ | 567 | int i, j, k; /* indices for partition finding loops */ |
555 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ | 568 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ |
569 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | ||
556 | int ndoms; /* number of sched domains in result */ | 570 | int ndoms; /* number of sched domains in result */ |
557 | int nslot; /* next empty doms[] cpumask_t slot */ | 571 | int nslot; /* next empty doms[] cpumask_t slot */ |
558 | 572 | ||
559 | q = NULL; | 573 | q = NULL; |
560 | csa = NULL; | 574 | csa = NULL; |
561 | doms = NULL; | 575 | doms = NULL; |
576 | dattr = NULL; | ||
562 | 577 | ||
563 | /* Special case for the 99% of systems with one, full, sched domain */ | 578 | /* Special case for the 99% of systems with one, full, sched domain */ |
564 | if (is_sched_load_balance(&top_cpuset)) { | 579 | if (is_sched_load_balance(&top_cpuset)) { |
@@ -566,6 +581,11 @@ static void rebuild_sched_domains(void) | |||
566 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | 581 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); |
567 | if (!doms) | 582 | if (!doms) |
568 | goto rebuild; | 583 | goto rebuild; |
584 | dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); | ||
585 | if (dattr) { | ||
586 | *dattr = SD_ATTR_INIT; | ||
587 | update_domain_attr(dattr, &top_cpuset); | ||
588 | } | ||
569 | *doms = top_cpuset.cpus_allowed; | 589 | *doms = top_cpuset.cpus_allowed; |
570 | goto rebuild; | 590 | goto rebuild; |
571 | } | 591 | } |
@@ -622,6 +642,7 @@ restart: | |||
622 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); | 642 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); |
623 | if (!doms) | 643 | if (!doms) |
624 | goto rebuild; | 644 | goto rebuild; |
645 | dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); | ||
625 | 646 | ||
626 | for (nslot = 0, i = 0; i < csn; i++) { | 647 | for (nslot = 0, i = 0; i < csn; i++) { |
627 | struct cpuset *a = csa[i]; | 648 | struct cpuset *a = csa[i]; |
@@ -644,12 +665,15 @@ restart: | |||
644 | } | 665 | } |
645 | 666 | ||
646 | cpus_clear(*dp); | 667 | cpus_clear(*dp); |
668 | if (dattr) | ||
669 | *(dattr + nslot) = SD_ATTR_INIT; | ||
647 | for (j = i; j < csn; j++) { | 670 | for (j = i; j < csn; j++) { |
648 | struct cpuset *b = csa[j]; | 671 | struct cpuset *b = csa[j]; |
649 | 672 | ||
650 | if (apn == b->pn) { | 673 | if (apn == b->pn) { |
651 | cpus_or(*dp, *dp, b->cpus_allowed); | 674 | cpus_or(*dp, *dp, b->cpus_allowed); |
652 | b->pn = -1; | 675 | b->pn = -1; |
676 | update_domain_attr(dattr, b); | ||
653 | } | 677 | } |
654 | } | 678 | } |
655 | nslot++; | 679 | nslot++; |
@@ -660,7 +684,7 @@ restart: | |||
660 | rebuild: | 684 | rebuild: |
661 | /* Have scheduler rebuild sched domains */ | 685 | /* Have scheduler rebuild sched domains */ |
662 | get_online_cpus(); | 686 | get_online_cpus(); |
663 | partition_sched_domains(ndoms, doms); | 687 | partition_sched_domains(ndoms, doms, dattr); |
664 | put_online_cpus(); | 688 | put_online_cpus(); |
665 | 689 | ||
666 | done: | 690 | done: |
@@ -668,6 +692,7 @@ done: | |||
668 | kfifo_free(q); | 692 | kfifo_free(q); |
669 | kfree(csa); | 693 | kfree(csa); |
670 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ | 694 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ |
695 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ | ||
671 | } | 696 | } |
672 | 697 | ||
673 | static inline int started_after_time(struct task_struct *t1, | 698 | static inline int started_after_time(struct task_struct *t1, |
@@ -729,7 +754,7 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | |||
729 | */ | 754 | */ |
730 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) | 755 | void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) |
731 | { | 756 | { |
732 | set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed); | 757 | set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed)); |
733 | } | 758 | } |
734 | 759 | ||
735 | /** | 760 | /** |
@@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | |||
1011 | return 0; | 1036 | return 0; |
1012 | } | 1037 | } |
1013 | 1038 | ||
1039 | static int update_relax_domain_level(struct cpuset *cs, char *buf) | ||
1040 | { | ||
1041 | int val = simple_strtol(buf, NULL, 10); | ||
1042 | |||
1043 | if (val < 0) | ||
1044 | val = -1; | ||
1045 | |||
1046 | if (val != cs->relax_domain_level) { | ||
1047 | cs->relax_domain_level = val; | ||
1048 | rebuild_sched_domains(); | ||
1049 | } | ||
1050 | |||
1051 | return 0; | ||
1052 | } | ||
1053 | |||
1014 | /* | 1054 | /* |
1015 | * update_flag - read a 0 or a 1 in a file and update associated flag | 1055 | * update_flag - read a 0 or a 1 in a file and update associated flag |
1016 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 1056 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, |
@@ -1178,7 +1218,7 @@ static void cpuset_attach(struct cgroup_subsys *ss, | |||
1178 | 1218 | ||
1179 | mutex_lock(&callback_mutex); | 1219 | mutex_lock(&callback_mutex); |
1180 | guarantee_online_cpus(cs, &cpus); | 1220 | guarantee_online_cpus(cs, &cpus); |
1181 | set_cpus_allowed(tsk, cpus); | 1221 | set_cpus_allowed_ptr(tsk, &cpus); |
1182 | mutex_unlock(&callback_mutex); | 1222 | mutex_unlock(&callback_mutex); |
1183 | 1223 | ||
1184 | from = oldcs->mems_allowed; | 1224 | from = oldcs->mems_allowed; |
@@ -1202,6 +1242,7 @@ typedef enum { | |||
1202 | FILE_CPU_EXCLUSIVE, | 1242 | FILE_CPU_EXCLUSIVE, |
1203 | FILE_MEM_EXCLUSIVE, | 1243 | FILE_MEM_EXCLUSIVE, |
1204 | FILE_SCHED_LOAD_BALANCE, | 1244 | FILE_SCHED_LOAD_BALANCE, |
1245 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1205 | FILE_MEMORY_PRESSURE_ENABLED, | 1246 | FILE_MEMORY_PRESSURE_ENABLED, |
1206 | FILE_MEMORY_PRESSURE, | 1247 | FILE_MEMORY_PRESSURE, |
1207 | FILE_SPREAD_PAGE, | 1248 | FILE_SPREAD_PAGE, |
@@ -1256,6 +1297,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1256 | case FILE_SCHED_LOAD_BALANCE: | 1297 | case FILE_SCHED_LOAD_BALANCE: |
1257 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); | 1298 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); |
1258 | break; | 1299 | break; |
1300 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1301 | retval = update_relax_domain_level(cs, buffer); | ||
1302 | break; | ||
1259 | case FILE_MEMORY_MIGRATE: | 1303 | case FILE_MEMORY_MIGRATE: |
1260 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); | 1304 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); |
1261 | break; | 1305 | break; |
@@ -1354,6 +1398,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, | |||
1354 | case FILE_SCHED_LOAD_BALANCE: | 1398 | case FILE_SCHED_LOAD_BALANCE: |
1355 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; | 1399 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; |
1356 | break; | 1400 | break; |
1401 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1402 | s += sprintf(s, "%d", cs->relax_domain_level); | ||
1403 | break; | ||
1357 | case FILE_MEMORY_MIGRATE: | 1404 | case FILE_MEMORY_MIGRATE: |
1358 | *s++ = is_memory_migrate(cs) ? '1' : '0'; | 1405 | *s++ = is_memory_migrate(cs) ? '1' : '0'; |
1359 | break; | 1406 | break; |
@@ -1424,6 +1471,13 @@ static struct cftype cft_sched_load_balance = { | |||
1424 | .private = FILE_SCHED_LOAD_BALANCE, | 1471 | .private = FILE_SCHED_LOAD_BALANCE, |
1425 | }; | 1472 | }; |
1426 | 1473 | ||
1474 | static struct cftype cft_sched_relax_domain_level = { | ||
1475 | .name = "sched_relax_domain_level", | ||
1476 | .read = cpuset_common_file_read, | ||
1477 | .write = cpuset_common_file_write, | ||
1478 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1479 | }; | ||
1480 | |||
1427 | static struct cftype cft_memory_migrate = { | 1481 | static struct cftype cft_memory_migrate = { |
1428 | .name = "memory_migrate", | 1482 | .name = "memory_migrate", |
1429 | .read = cpuset_common_file_read, | 1483 | .read = cpuset_common_file_read, |
@@ -1475,6 +1529,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1475 | return err; | 1529 | return err; |
1476 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) | 1530 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) |
1477 | return err; | 1531 | return err; |
1532 | if ((err = cgroup_add_file(cont, ss, | ||
1533 | &cft_sched_relax_domain_level)) < 0) | ||
1534 | return err; | ||
1478 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) | 1535 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) |
1479 | return err; | 1536 | return err; |
1480 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) | 1537 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) |
@@ -1555,10 +1612,11 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1555 | if (is_spread_slab(parent)) | 1612 | if (is_spread_slab(parent)) |
1556 | set_bit(CS_SPREAD_SLAB, &cs->flags); | 1613 | set_bit(CS_SPREAD_SLAB, &cs->flags); |
1557 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); | 1614 | set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); |
1558 | cs->cpus_allowed = CPU_MASK_NONE; | 1615 | cpus_clear(cs->cpus_allowed); |
1559 | cs->mems_allowed = NODE_MASK_NONE; | 1616 | nodes_clear(cs->mems_allowed); |
1560 | cs->mems_generation = cpuset_mems_generation++; | 1617 | cs->mems_generation = cpuset_mems_generation++; |
1561 | fmeter_init(&cs->fmeter); | 1618 | fmeter_init(&cs->fmeter); |
1619 | cs->relax_domain_level = -1; | ||
1562 | 1620 | ||
1563 | cs->parent = parent; | 1621 | cs->parent = parent; |
1564 | number_of_cpusets++; | 1622 | number_of_cpusets++; |
@@ -1625,12 +1683,13 @@ int __init cpuset_init(void) | |||
1625 | { | 1683 | { |
1626 | int err = 0; | 1684 | int err = 0; |
1627 | 1685 | ||
1628 | top_cpuset.cpus_allowed = CPU_MASK_ALL; | 1686 | cpus_setall(top_cpuset.cpus_allowed); |
1629 | top_cpuset.mems_allowed = NODE_MASK_ALL; | 1687 | nodes_setall(top_cpuset.mems_allowed); |
1630 | 1688 | ||
1631 | fmeter_init(&top_cpuset.fmeter); | 1689 | fmeter_init(&top_cpuset.fmeter); |
1632 | top_cpuset.mems_generation = cpuset_mems_generation++; | 1690 | top_cpuset.mems_generation = cpuset_mems_generation++; |
1633 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); | 1691 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); |
1692 | top_cpuset.relax_domain_level = -1; | ||
1634 | 1693 | ||
1635 | err = register_filesystem(&cpuset_fs_type); | 1694 | err = register_filesystem(&cpuset_fs_type); |
1636 | if (err < 0) | 1695 | if (err < 0) |
@@ -1844,6 +1903,7 @@ void __init cpuset_init_smp(void) | |||
1844 | 1903 | ||
1845 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. | 1904 | * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. |
1846 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. | 1905 | * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. |
1906 | * @pmask: pointer to cpumask_t variable to receive cpus_allowed set. | ||
1847 | * | 1907 | * |
1848 | * Description: Returns the cpumask_t cpus_allowed of the cpuset | 1908 | * Description: Returns the cpumask_t cpus_allowed of the cpuset |
1849 | * attached to the specified @tsk. Guaranteed to return some non-empty | 1909 | * attached to the specified @tsk. Guaranteed to return some non-empty |
@@ -1851,35 +1911,27 @@ void __init cpuset_init_smp(void) | |||
1851 | * tasks cpuset. | 1911 | * tasks cpuset. |
1852 | **/ | 1912 | **/ |
1853 | 1913 | ||
1854 | cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) | 1914 | void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask) |
1855 | { | 1915 | { |
1856 | cpumask_t mask; | ||
1857 | |||
1858 | mutex_lock(&callback_mutex); | 1916 | mutex_lock(&callback_mutex); |
1859 | mask = cpuset_cpus_allowed_locked(tsk); | 1917 | cpuset_cpus_allowed_locked(tsk, pmask); |
1860 | mutex_unlock(&callback_mutex); | 1918 | mutex_unlock(&callback_mutex); |
1861 | |||
1862 | return mask; | ||
1863 | } | 1919 | } |
1864 | 1920 | ||
1865 | /** | 1921 | /** |
1866 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. | 1922 | * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. |
1867 | * Must be called with callback_mutex held. | 1923 | * Must be called with callback_mutex held. |
1868 | **/ | 1924 | **/ |
1869 | cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) | 1925 | void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask) |
1870 | { | 1926 | { |
1871 | cpumask_t mask; | ||
1872 | |||
1873 | task_lock(tsk); | 1927 | task_lock(tsk); |
1874 | guarantee_online_cpus(task_cs(tsk), &mask); | 1928 | guarantee_online_cpus(task_cs(tsk), pmask); |
1875 | task_unlock(tsk); | 1929 | task_unlock(tsk); |
1876 | |||
1877 | return mask; | ||
1878 | } | 1930 | } |
1879 | 1931 | ||
1880 | void cpuset_init_current_mems_allowed(void) | 1932 | void cpuset_init_current_mems_allowed(void) |
1881 | { | 1933 | { |
1882 | current->mems_allowed = NODE_MASK_ALL; | 1934 | nodes_setall(current->mems_allowed); |
1883 | } | 1935 | } |
1884 | 1936 | ||
1885 | /** | 1937 | /** |
@@ -2261,8 +2313,16 @@ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) | |||
2261 | m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, | 2313 | m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, |
2262 | task->cpus_allowed); | 2314 | task->cpus_allowed); |
2263 | seq_printf(m, "\n"); | 2315 | seq_printf(m, "\n"); |
2316 | seq_printf(m, "Cpus_allowed_list:\t"); | ||
2317 | m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count, | ||
2318 | task->cpus_allowed); | ||
2319 | seq_printf(m, "\n"); | ||
2264 | seq_printf(m, "Mems_allowed:\t"); | 2320 | seq_printf(m, "Mems_allowed:\t"); |
2265 | m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, | 2321 | m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, |
2266 | task->mems_allowed); | 2322 | task->mems_allowed); |
2267 | seq_printf(m, "\n"); | 2323 | seq_printf(m, "\n"); |
2324 | seq_printf(m, "Mems_allowed_list:\t"); | ||
2325 | m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count, | ||
2326 | task->mems_allowed); | ||
2327 | seq_printf(m, "\n"); | ||
2268 | } | 2328 | } |