aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c125
1 files changed, 89 insertions, 36 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a1b61f414228..48a976c52cf5 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -98,6 +98,9 @@ struct cpuset {
98 /* partition number for rebuild_sched_domains() */ 98 /* partition number for rebuild_sched_domains() */
99 int pn; 99 int pn;
100 100
101 /* for custom sched domain */
102 int relax_domain_level;
103
101 /* used for walking a cpuset heirarchy */ 104 /* used for walking a cpuset heirarchy */
102 struct list_head stack_list; 105 struct list_head stack_list;
103}; 106};
@@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
478 return cpus_intersects(a->cpus_allowed, b->cpus_allowed); 481 return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
479} 482}
480 483
484static void
485update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
486{
487 if (!dattr)
488 return;
489 if (dattr->relax_domain_level < c->relax_domain_level)
490 dattr->relax_domain_level = c->relax_domain_level;
491 return;
492}
493
481/* 494/*
482 * rebuild_sched_domains() 495 * rebuild_sched_domains()
483 * 496 *
@@ -553,12 +566,14 @@ static void rebuild_sched_domains(void)
553 int csn; /* how many cpuset ptrs in csa so far */ 566 int csn; /* how many cpuset ptrs in csa so far */
554 int i, j, k; /* indices for partition finding loops */ 567 int i, j, k; /* indices for partition finding loops */
555 cpumask_t *doms; /* resulting partition; i.e. sched domains */ 568 cpumask_t *doms; /* resulting partition; i.e. sched domains */
569 struct sched_domain_attr *dattr; /* attributes for custom domains */
556 int ndoms; /* number of sched domains in result */ 570 int ndoms; /* number of sched domains in result */
557 int nslot; /* next empty doms[] cpumask_t slot */ 571 int nslot; /* next empty doms[] cpumask_t slot */
558 572
559 q = NULL; 573 q = NULL;
560 csa = NULL; 574 csa = NULL;
561 doms = NULL; 575 doms = NULL;
576 dattr = NULL;
562 577
563 /* Special case for the 99% of systems with one, full, sched domain */ 578 /* Special case for the 99% of systems with one, full, sched domain */
564 if (is_sched_load_balance(&top_cpuset)) { 579 if (is_sched_load_balance(&top_cpuset)) {
@@ -566,6 +581,11 @@ static void rebuild_sched_domains(void)
566 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 581 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
567 if (!doms) 582 if (!doms)
568 goto rebuild; 583 goto rebuild;
584 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
585 if (dattr) {
586 *dattr = SD_ATTR_INIT;
587 update_domain_attr(dattr, &top_cpuset);
588 }
569 *doms = top_cpuset.cpus_allowed; 589 *doms = top_cpuset.cpus_allowed;
570 goto rebuild; 590 goto rebuild;
571 } 591 }
@@ -622,6 +642,7 @@ restart:
622 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 642 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
623 if (!doms) 643 if (!doms)
624 goto rebuild; 644 goto rebuild;
645 dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
625 646
626 for (nslot = 0, i = 0; i < csn; i++) { 647 for (nslot = 0, i = 0; i < csn; i++) {
627 struct cpuset *a = csa[i]; 648 struct cpuset *a = csa[i];
@@ -644,12 +665,15 @@ restart:
644 } 665 }
645 666
646 cpus_clear(*dp); 667 cpus_clear(*dp);
668 if (dattr)
669 *(dattr + nslot) = SD_ATTR_INIT;
647 for (j = i; j < csn; j++) { 670 for (j = i; j < csn; j++) {
648 struct cpuset *b = csa[j]; 671 struct cpuset *b = csa[j];
649 672
650 if (apn == b->pn) { 673 if (apn == b->pn) {
651 cpus_or(*dp, *dp, b->cpus_allowed); 674 cpus_or(*dp, *dp, b->cpus_allowed);
652 b->pn = -1; 675 b->pn = -1;
676 update_domain_attr(dattr, b);
653 } 677 }
654 } 678 }
655 nslot++; 679 nslot++;
@@ -660,7 +684,7 @@ restart:
660rebuild: 684rebuild:
661 /* Have scheduler rebuild sched domains */ 685 /* Have scheduler rebuild sched domains */
662 get_online_cpus(); 686 get_online_cpus();
663 partition_sched_domains(ndoms, doms); 687 partition_sched_domains(ndoms, doms, dattr);
664 put_online_cpus(); 688 put_online_cpus();
665 689
666done: 690done:
@@ -668,6 +692,7 @@ done:
668 kfifo_free(q); 692 kfifo_free(q);
669 kfree(csa); 693 kfree(csa);
670 /* Don't kfree(doms) -- partition_sched_domains() does that. */ 694 /* Don't kfree(doms) -- partition_sched_domains() does that. */
695 /* Don't kfree(dattr) -- partition_sched_domains() does that. */
671} 696}
672 697
673static inline int started_after_time(struct task_struct *t1, 698static inline int started_after_time(struct task_struct *t1,
@@ -729,7 +754,7 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
729 */ 754 */
730void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) 755void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
731{ 756{
732 set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed); 757 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
733} 758}
734 759
735/** 760/**
@@ -916,7 +941,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
916 cs->mems_generation = cpuset_mems_generation++; 941 cs->mems_generation = cpuset_mems_generation++;
917 mutex_unlock(&callback_mutex); 942 mutex_unlock(&callback_mutex);
918 943
919 cpuset_being_rebound = cs; /* causes mpol_copy() rebind */ 944 cpuset_being_rebound = cs; /* causes mpol_dup() rebind */
920 945
921 fudge = 10; /* spare mmarray[] slots */ 946 fudge = 10; /* spare mmarray[] slots */
922 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */ 947 fudge += cpus_weight(cs->cpus_allowed); /* imagine one fork-bomb/cpu */
@@ -967,7 +992,7 @@ static int update_nodemask(struct cpuset *cs, char *buf)
967 * rebind the vma mempolicies of each mm in mmarray[] to their 992 * rebind the vma mempolicies of each mm in mmarray[] to their
968 * new cpuset, and release that mm. The mpol_rebind_mm() 993 * new cpuset, and release that mm. The mpol_rebind_mm()
969 * call takes mmap_sem, which we couldn't take while holding 994 * call takes mmap_sem, which we couldn't take while holding
970 * tasklist_lock. Forks can happen again now - the mpol_copy() 995 * tasklist_lock. Forks can happen again now - the mpol_dup()
971 * cpuset_being_rebound check will catch such forks, and rebind 996 * cpuset_being_rebound check will catch such forks, and rebind
972 * their vma mempolicies too. Because we still hold the global 997 * their vma mempolicies too. Because we still hold the global
973 * cgroup_mutex, we know that no other rebind effort will 998 * cgroup_mutex, we know that no other rebind effort will
@@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
1011 return 0; 1036 return 0;
1012} 1037}
1013 1038
1039static int update_relax_domain_level(struct cpuset *cs, char *buf)
1040{
1041 int val = simple_strtol(buf, NULL, 10);
1042
1043 if (val < 0)
1044 val = -1;
1045
1046 if (val != cs->relax_domain_level) {
1047 cs->relax_domain_level = val;
1048 rebuild_sched_domains();
1049 }
1050
1051 return 0;
1052}
1053
1014/* 1054/*
1015 * update_flag - read a 0 or a 1 in a file and update associated flag 1055 * update_flag - read a 0 or a 1 in a file and update associated flag
1016 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 1056 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
@@ -1178,7 +1218,7 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1178 1218
1179 mutex_lock(&callback_mutex); 1219 mutex_lock(&callback_mutex);
1180 guarantee_online_cpus(cs, &cpus); 1220 guarantee_online_cpus(cs, &cpus);
1181 set_cpus_allowed(tsk, cpus); 1221 set_cpus_allowed_ptr(tsk, &cpus);
1182 mutex_unlock(&callback_mutex); 1222 mutex_unlock(&callback_mutex);
1183 1223
1184 from = oldcs->mems_allowed; 1224 from = oldcs->mems_allowed;
@@ -1202,6 +1242,7 @@ typedef enum {
1202 FILE_CPU_EXCLUSIVE, 1242 FILE_CPU_EXCLUSIVE,
1203 FILE_MEM_EXCLUSIVE, 1243 FILE_MEM_EXCLUSIVE,
1204 FILE_SCHED_LOAD_BALANCE, 1244 FILE_SCHED_LOAD_BALANCE,
1245 FILE_SCHED_RELAX_DOMAIN_LEVEL,
1205 FILE_MEMORY_PRESSURE_ENABLED, 1246 FILE_MEMORY_PRESSURE_ENABLED,
1206 FILE_MEMORY_PRESSURE, 1247 FILE_MEMORY_PRESSURE,
1207 FILE_SPREAD_PAGE, 1248 FILE_SPREAD_PAGE,
@@ -1224,7 +1265,8 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
1224 return -E2BIG; 1265 return -E2BIG;
1225 1266
1226 /* +1 for nul-terminator */ 1267 /* +1 for nul-terminator */
1227 if ((buffer = kmalloc(nbytes + 1, GFP_KERNEL)) == 0) 1268 buffer = kmalloc(nbytes + 1, GFP_KERNEL);
1269 if (!buffer)
1228 return -ENOMEM; 1270 return -ENOMEM;
1229 1271
1230 if (copy_from_user(buffer, userbuf, nbytes)) { 1272 if (copy_from_user(buffer, userbuf, nbytes)) {
@@ -1256,6 +1298,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
1256 case FILE_SCHED_LOAD_BALANCE: 1298 case FILE_SCHED_LOAD_BALANCE:
1257 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); 1299 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
1258 break; 1300 break;
1301 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1302 retval = update_relax_domain_level(cs, buffer);
1303 break;
1259 case FILE_MEMORY_MIGRATE: 1304 case FILE_MEMORY_MIGRATE:
1260 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); 1305 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
1261 break; 1306 break;
@@ -1354,6 +1399,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
1354 case FILE_SCHED_LOAD_BALANCE: 1399 case FILE_SCHED_LOAD_BALANCE:
1355 *s++ = is_sched_load_balance(cs) ? '1' : '0'; 1400 *s++ = is_sched_load_balance(cs) ? '1' : '0';
1356 break; 1401 break;
1402 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1403 s += sprintf(s, "%d", cs->relax_domain_level);
1404 break;
1357 case FILE_MEMORY_MIGRATE: 1405 case FILE_MEMORY_MIGRATE:
1358 *s++ = is_memory_migrate(cs) ? '1' : '0'; 1406 *s++ = is_memory_migrate(cs) ? '1' : '0';
1359 break; 1407 break;
@@ -1424,6 +1472,13 @@ static struct cftype cft_sched_load_balance = {
1424 .private = FILE_SCHED_LOAD_BALANCE, 1472 .private = FILE_SCHED_LOAD_BALANCE,
1425}; 1473};
1426 1474
1475static struct cftype cft_sched_relax_domain_level = {
1476 .name = "sched_relax_domain_level",
1477 .read = cpuset_common_file_read,
1478 .write = cpuset_common_file_write,
1479 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
1480};
1481
1427static struct cftype cft_memory_migrate = { 1482static struct cftype cft_memory_migrate = {
1428 .name = "memory_migrate", 1483 .name = "memory_migrate",
1429 .read = cpuset_common_file_read, 1484 .read = cpuset_common_file_read,
@@ -1475,6 +1530,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1475 return err; 1530 return err;
1476 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) 1531 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
1477 return err; 1532 return err;
1533 if ((err = cgroup_add_file(cont, ss,
1534 &cft_sched_relax_domain_level)) < 0)
1535 return err;
1478 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) 1536 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
1479 return err; 1537 return err;
1480 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) 1538 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
@@ -1555,10 +1613,11 @@ static struct cgroup_subsys_state *cpuset_create(
1555 if (is_spread_slab(parent)) 1613 if (is_spread_slab(parent))
1556 set_bit(CS_SPREAD_SLAB, &cs->flags); 1614 set_bit(CS_SPREAD_SLAB, &cs->flags);
1557 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 1615 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1558 cs->cpus_allowed = CPU_MASK_NONE; 1616 cpus_clear(cs->cpus_allowed);
1559 cs->mems_allowed = NODE_MASK_NONE; 1617 nodes_clear(cs->mems_allowed);
1560 cs->mems_generation = cpuset_mems_generation++; 1618 cs->mems_generation = cpuset_mems_generation++;
1561 fmeter_init(&cs->fmeter); 1619 fmeter_init(&cs->fmeter);
1620 cs->relax_domain_level = -1;
1562 1621
1563 cs->parent = parent; 1622 cs->parent = parent;
1564 number_of_cpusets++; 1623 number_of_cpusets++;
@@ -1625,12 +1684,13 @@ int __init cpuset_init(void)
1625{ 1684{
1626 int err = 0; 1685 int err = 0;
1627 1686
1628 top_cpuset.cpus_allowed = CPU_MASK_ALL; 1687 cpus_setall(top_cpuset.cpus_allowed);
1629 top_cpuset.mems_allowed = NODE_MASK_ALL; 1688 nodes_setall(top_cpuset.mems_allowed);
1630 1689
1631 fmeter_init(&top_cpuset.fmeter); 1690 fmeter_init(&top_cpuset.fmeter);
1632 top_cpuset.mems_generation = cpuset_mems_generation++; 1691 top_cpuset.mems_generation = cpuset_mems_generation++;
1633 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); 1692 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1693 top_cpuset.relax_domain_level = -1;
1634 1694
1635 err = register_filesystem(&cpuset_fs_type); 1695 err = register_filesystem(&cpuset_fs_type);
1636 if (err < 0) 1696 if (err < 0)
@@ -1844,6 +1904,7 @@ void __init cpuset_init_smp(void)
1844 1904
1845 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 1905 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
1846 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 1906 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
1907 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
1847 * 1908 *
1848 * Description: Returns the cpumask_t cpus_allowed of the cpuset 1909 * Description: Returns the cpumask_t cpus_allowed of the cpuset
1849 * attached to the specified @tsk. Guaranteed to return some non-empty 1910 * attached to the specified @tsk. Guaranteed to return some non-empty
@@ -1851,35 +1912,27 @@ void __init cpuset_init_smp(void)
1851 * tasks cpuset. 1912 * tasks cpuset.
1852 **/ 1913 **/
1853 1914
1854cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) 1915void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
1855{ 1916{
1856 cpumask_t mask;
1857
1858 mutex_lock(&callback_mutex); 1917 mutex_lock(&callback_mutex);
1859 mask = cpuset_cpus_allowed_locked(tsk); 1918 cpuset_cpus_allowed_locked(tsk, pmask);
1860 mutex_unlock(&callback_mutex); 1919 mutex_unlock(&callback_mutex);
1861
1862 return mask;
1863} 1920}
1864 1921
1865/** 1922/**
1866 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. 1923 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
1867 * Must be called with callback_mutex held. 1924 * Must be called with callback_mutex held.
1868 **/ 1925 **/
1869cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) 1926void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
1870{ 1927{
1871 cpumask_t mask;
1872
1873 task_lock(tsk); 1928 task_lock(tsk);
1874 guarantee_online_cpus(task_cs(tsk), &mask); 1929 guarantee_online_cpus(task_cs(tsk), pmask);
1875 task_unlock(tsk); 1930 task_unlock(tsk);
1876
1877 return mask;
1878} 1931}
1879 1932
1880void cpuset_init_current_mems_allowed(void) 1933void cpuset_init_current_mems_allowed(void)
1881{ 1934{
1882 current->mems_allowed = NODE_MASK_ALL; 1935 nodes_setall(current->mems_allowed);
1883} 1936}
1884 1937
1885/** 1938/**
@@ -1906,22 +1959,14 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
1906} 1959}
1907 1960
1908/** 1961/**
1909 * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed 1962 * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
1910 * @zl: the zonelist to be checked 1963 * @nodemask: the nodemask to be checked
1911 * 1964 *
1912 * Are any of the nodes on zonelist zl allowed in current->mems_allowed? 1965 * Are any of the nodes in the nodemask allowed in current->mems_allowed?
1913 */ 1966 */
1914int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) 1967int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
1915{ 1968{
1916 int i; 1969 return nodes_intersects(*nodemask, current->mems_allowed);
1917
1918 for (i = 0; zl->zones[i]; i++) {
1919 int nid = zone_to_nid(zl->zones[i]);
1920
1921 if (node_isset(nid, current->mems_allowed))
1922 return 1;
1923 }
1924 return 0;
1925} 1970}
1926 1971
1927/* 1972/*
@@ -2261,8 +2306,16 @@ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2261 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, 2306 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
2262 task->cpus_allowed); 2307 task->cpus_allowed);
2263 seq_printf(m, "\n"); 2308 seq_printf(m, "\n");
2309 seq_printf(m, "Cpus_allowed_list:\t");
2310 m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
2311 task->cpus_allowed);
2312 seq_printf(m, "\n");
2264 seq_printf(m, "Mems_allowed:\t"); 2313 seq_printf(m, "Mems_allowed:\t");
2265 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, 2314 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
2266 task->mems_allowed); 2315 task->mems_allowed);
2267 seq_printf(m, "\n"); 2316 seq_printf(m, "\n");
2317 seq_printf(m, "Mems_allowed_list:\t");
2318 m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
2319 task->mems_allowed);
2320 seq_printf(m, "\n");
2268} 2321}