aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cpuset.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-04-21 18:40:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-21 18:40:24 -0400
commitec965350bb98bd291eb34f6ecddfdcfc36da1e6e (patch)
tree983bcaf33ed00b48a86f7f8790cc460cf15dd252 /kernel/cpuset.c
parent5f033bb9bc5cb3bb37a79e3ef131f50ecdcb72b0 (diff)
parent486fdae21458bd9f4e125099bb3c38a4064e450e (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched-devel: (62 commits) sched: build fix sched: better rt-group documentation sched: features fix sched: /debug/sched_features sched: add SCHED_FEAT_DEADLINE sched: debug: show a weight tree sched: fair: weight calculations sched: fair-group: de-couple load-balancing from the rb-trees sched: fair-group scheduling vs latency sched: rt-group: optimize dequeue_rt_stack sched: debug: add some debug code to handle the full hierarchy sched: fair-group: SMP-nice for group scheduling sched, cpuset: customize sched domains, core sched, cpuset: customize sched domains, docs sched: prepatory code movement sched: rt: multi level group constraints sched: task_group hierarchy sched: fix the task_group hierarchy for UID grouping sched: allow the group scheduler to have multiple levels sched: mix tasks and groups ...
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r--kernel/cpuset.c100
1 files changed, 80 insertions, 20 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index a1b61f414228..8b35fbd8292f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -98,6 +98,9 @@ struct cpuset {
98 /* partition number for rebuild_sched_domains() */ 98 /* partition number for rebuild_sched_domains() */
99 int pn; 99 int pn;
100 100
101 /* for custom sched domain */
102 int relax_domain_level;
103
101 /* used for walking a cpuset heirarchy */ 104 /* used for walking a cpuset heirarchy */
102 struct list_head stack_list; 105 struct list_head stack_list;
103}; 106};
@@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
478 return cpus_intersects(a->cpus_allowed, b->cpus_allowed); 481 return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
479} 482}
480 483
484static void
485update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
486{
487 if (!dattr)
488 return;
489 if (dattr->relax_domain_level < c->relax_domain_level)
490 dattr->relax_domain_level = c->relax_domain_level;
491 return;
492}
493
481/* 494/*
482 * rebuild_sched_domains() 495 * rebuild_sched_domains()
483 * 496 *
@@ -553,12 +566,14 @@ static void rebuild_sched_domains(void)
553 int csn; /* how many cpuset ptrs in csa so far */ 566 int csn; /* how many cpuset ptrs in csa so far */
554 int i, j, k; /* indices for partition finding loops */ 567 int i, j, k; /* indices for partition finding loops */
555 cpumask_t *doms; /* resulting partition; i.e. sched domains */ 568 cpumask_t *doms; /* resulting partition; i.e. sched domains */
569 struct sched_domain_attr *dattr; /* attributes for custom domains */
556 int ndoms; /* number of sched domains in result */ 570 int ndoms; /* number of sched domains in result */
557 int nslot; /* next empty doms[] cpumask_t slot */ 571 int nslot; /* next empty doms[] cpumask_t slot */
558 572
559 q = NULL; 573 q = NULL;
560 csa = NULL; 574 csa = NULL;
561 doms = NULL; 575 doms = NULL;
576 dattr = NULL;
562 577
563 /* Special case for the 99% of systems with one, full, sched domain */ 578 /* Special case for the 99% of systems with one, full, sched domain */
564 if (is_sched_load_balance(&top_cpuset)) { 579 if (is_sched_load_balance(&top_cpuset)) {
@@ -566,6 +581,11 @@ static void rebuild_sched_domains(void)
566 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 581 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
567 if (!doms) 582 if (!doms)
568 goto rebuild; 583 goto rebuild;
584 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
585 if (dattr) {
586 *dattr = SD_ATTR_INIT;
587 update_domain_attr(dattr, &top_cpuset);
588 }
569 *doms = top_cpuset.cpus_allowed; 589 *doms = top_cpuset.cpus_allowed;
570 goto rebuild; 590 goto rebuild;
571 } 591 }
@@ -622,6 +642,7 @@ restart:
622 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 642 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
623 if (!doms) 643 if (!doms)
624 goto rebuild; 644 goto rebuild;
645 dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
625 646
626 for (nslot = 0, i = 0; i < csn; i++) { 647 for (nslot = 0, i = 0; i < csn; i++) {
627 struct cpuset *a = csa[i]; 648 struct cpuset *a = csa[i];
@@ -644,12 +665,15 @@ restart:
644 } 665 }
645 666
646 cpus_clear(*dp); 667 cpus_clear(*dp);
668 if (dattr)
669 *(dattr + nslot) = SD_ATTR_INIT;
647 for (j = i; j < csn; j++) { 670 for (j = i; j < csn; j++) {
648 struct cpuset *b = csa[j]; 671 struct cpuset *b = csa[j];
649 672
650 if (apn == b->pn) { 673 if (apn == b->pn) {
651 cpus_or(*dp, *dp, b->cpus_allowed); 674 cpus_or(*dp, *dp, b->cpus_allowed);
652 b->pn = -1; 675 b->pn = -1;
676 update_domain_attr(dattr, b);
653 } 677 }
654 } 678 }
655 nslot++; 679 nslot++;
@@ -660,7 +684,7 @@ restart:
660rebuild: 684rebuild:
661 /* Have scheduler rebuild sched domains */ 685 /* Have scheduler rebuild sched domains */
662 get_online_cpus(); 686 get_online_cpus();
663 partition_sched_domains(ndoms, doms); 687 partition_sched_domains(ndoms, doms, dattr);
664 put_online_cpus(); 688 put_online_cpus();
665 689
666done: 690done:
@@ -668,6 +692,7 @@ done:
668 kfifo_free(q); 692 kfifo_free(q);
669 kfree(csa); 693 kfree(csa);
670 /* Don't kfree(doms) -- partition_sched_domains() does that. */ 694 /* Don't kfree(doms) -- partition_sched_domains() does that. */
695 /* Don't kfree(dattr) -- partition_sched_domains() does that. */
671} 696}
672 697
673static inline int started_after_time(struct task_struct *t1, 698static inline int started_after_time(struct task_struct *t1,
@@ -729,7 +754,7 @@ int cpuset_test_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
729 */ 754 */
730void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan) 755void cpuset_change_cpumask(struct task_struct *tsk, struct cgroup_scanner *scan)
731{ 756{
732 set_cpus_allowed(tsk, (cgroup_cs(scan->cg))->cpus_allowed); 757 set_cpus_allowed_ptr(tsk, &((cgroup_cs(scan->cg))->cpus_allowed));
733} 758}
734 759
735/** 760/**
@@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
1011 return 0; 1036 return 0;
1012} 1037}
1013 1038
1039static int update_relax_domain_level(struct cpuset *cs, char *buf)
1040{
1041 int val = simple_strtol(buf, NULL, 10);
1042
1043 if (val < 0)
1044 val = -1;
1045
1046 if (val != cs->relax_domain_level) {
1047 cs->relax_domain_level = val;
1048 rebuild_sched_domains();
1049 }
1050
1051 return 0;
1052}
1053
1014/* 1054/*
1015 * update_flag - read a 0 or a 1 in a file and update associated flag 1055 * update_flag - read a 0 or a 1 in a file and update associated flag
1016 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 1056 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
@@ -1178,7 +1218,7 @@ static void cpuset_attach(struct cgroup_subsys *ss,
1178 1218
1179 mutex_lock(&callback_mutex); 1219 mutex_lock(&callback_mutex);
1180 guarantee_online_cpus(cs, &cpus); 1220 guarantee_online_cpus(cs, &cpus);
1181 set_cpus_allowed(tsk, cpus); 1221 set_cpus_allowed_ptr(tsk, &cpus);
1182 mutex_unlock(&callback_mutex); 1222 mutex_unlock(&callback_mutex);
1183 1223
1184 from = oldcs->mems_allowed; 1224 from = oldcs->mems_allowed;
@@ -1202,6 +1242,7 @@ typedef enum {
1202 FILE_CPU_EXCLUSIVE, 1242 FILE_CPU_EXCLUSIVE,
1203 FILE_MEM_EXCLUSIVE, 1243 FILE_MEM_EXCLUSIVE,
1204 FILE_SCHED_LOAD_BALANCE, 1244 FILE_SCHED_LOAD_BALANCE,
1245 FILE_SCHED_RELAX_DOMAIN_LEVEL,
1205 FILE_MEMORY_PRESSURE_ENABLED, 1246 FILE_MEMORY_PRESSURE_ENABLED,
1206 FILE_MEMORY_PRESSURE, 1247 FILE_MEMORY_PRESSURE,
1207 FILE_SPREAD_PAGE, 1248 FILE_SPREAD_PAGE,
@@ -1256,6 +1297,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
1256 case FILE_SCHED_LOAD_BALANCE: 1297 case FILE_SCHED_LOAD_BALANCE:
1257 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); 1298 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
1258 break; 1299 break;
1300 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1301 retval = update_relax_domain_level(cs, buffer);
1302 break;
1259 case FILE_MEMORY_MIGRATE: 1303 case FILE_MEMORY_MIGRATE:
1260 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); 1304 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
1261 break; 1305 break;
@@ -1354,6 +1398,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
1354 case FILE_SCHED_LOAD_BALANCE: 1398 case FILE_SCHED_LOAD_BALANCE:
1355 *s++ = is_sched_load_balance(cs) ? '1' : '0'; 1399 *s++ = is_sched_load_balance(cs) ? '1' : '0';
1356 break; 1400 break;
1401 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1402 s += sprintf(s, "%d", cs->relax_domain_level);
1403 break;
1357 case FILE_MEMORY_MIGRATE: 1404 case FILE_MEMORY_MIGRATE:
1358 *s++ = is_memory_migrate(cs) ? '1' : '0'; 1405 *s++ = is_memory_migrate(cs) ? '1' : '0';
1359 break; 1406 break;
@@ -1424,6 +1471,13 @@ static struct cftype cft_sched_load_balance = {
1424 .private = FILE_SCHED_LOAD_BALANCE, 1471 .private = FILE_SCHED_LOAD_BALANCE,
1425}; 1472};
1426 1473
1474static struct cftype cft_sched_relax_domain_level = {
1475 .name = "sched_relax_domain_level",
1476 .read = cpuset_common_file_read,
1477 .write = cpuset_common_file_write,
1478 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
1479};
1480
1427static struct cftype cft_memory_migrate = { 1481static struct cftype cft_memory_migrate = {
1428 .name = "memory_migrate", 1482 .name = "memory_migrate",
1429 .read = cpuset_common_file_read, 1483 .read = cpuset_common_file_read,
@@ -1475,6 +1529,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1475 return err; 1529 return err;
1476 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) 1530 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
1477 return err; 1531 return err;
1532 if ((err = cgroup_add_file(cont, ss,
1533 &cft_sched_relax_domain_level)) < 0)
1534 return err;
1478 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) 1535 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
1479 return err; 1536 return err;
1480 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) 1537 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
@@ -1555,10 +1612,11 @@ static struct cgroup_subsys_state *cpuset_create(
1555 if (is_spread_slab(parent)) 1612 if (is_spread_slab(parent))
1556 set_bit(CS_SPREAD_SLAB, &cs->flags); 1613 set_bit(CS_SPREAD_SLAB, &cs->flags);
1557 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); 1614 set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
1558 cs->cpus_allowed = CPU_MASK_NONE; 1615 cpus_clear(cs->cpus_allowed);
1559 cs->mems_allowed = NODE_MASK_NONE; 1616 nodes_clear(cs->mems_allowed);
1560 cs->mems_generation = cpuset_mems_generation++; 1617 cs->mems_generation = cpuset_mems_generation++;
1561 fmeter_init(&cs->fmeter); 1618 fmeter_init(&cs->fmeter);
1619 cs->relax_domain_level = -1;
1562 1620
1563 cs->parent = parent; 1621 cs->parent = parent;
1564 number_of_cpusets++; 1622 number_of_cpusets++;
@@ -1625,12 +1683,13 @@ int __init cpuset_init(void)
1625{ 1683{
1626 int err = 0; 1684 int err = 0;
1627 1685
1628 top_cpuset.cpus_allowed = CPU_MASK_ALL; 1686 cpus_setall(top_cpuset.cpus_allowed);
1629 top_cpuset.mems_allowed = NODE_MASK_ALL; 1687 nodes_setall(top_cpuset.mems_allowed);
1630 1688
1631 fmeter_init(&top_cpuset.fmeter); 1689 fmeter_init(&top_cpuset.fmeter);
1632 top_cpuset.mems_generation = cpuset_mems_generation++; 1690 top_cpuset.mems_generation = cpuset_mems_generation++;
1633 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); 1691 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1692 top_cpuset.relax_domain_level = -1;
1634 1693
1635 err = register_filesystem(&cpuset_fs_type); 1694 err = register_filesystem(&cpuset_fs_type);
1636 if (err < 0) 1695 if (err < 0)
@@ -1844,6 +1903,7 @@ void __init cpuset_init_smp(void)
1844 1903
1845 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset. 1904 * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
1846 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed. 1905 * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
1906 * @pmask: pointer to cpumask_t variable to receive cpus_allowed set.
1847 * 1907 *
1848 * Description: Returns the cpumask_t cpus_allowed of the cpuset 1908 * Description: Returns the cpumask_t cpus_allowed of the cpuset
1849 * attached to the specified @tsk. Guaranteed to return some non-empty 1909 * attached to the specified @tsk. Guaranteed to return some non-empty
@@ -1851,35 +1911,27 @@ void __init cpuset_init_smp(void)
1851 * tasks cpuset. 1911 * tasks cpuset.
1852 **/ 1912 **/
1853 1913
1854cpumask_t cpuset_cpus_allowed(struct task_struct *tsk) 1914void cpuset_cpus_allowed(struct task_struct *tsk, cpumask_t *pmask)
1855{ 1915{
1856 cpumask_t mask;
1857
1858 mutex_lock(&callback_mutex); 1916 mutex_lock(&callback_mutex);
1859 mask = cpuset_cpus_allowed_locked(tsk); 1917 cpuset_cpus_allowed_locked(tsk, pmask);
1860 mutex_unlock(&callback_mutex); 1918 mutex_unlock(&callback_mutex);
1861
1862 return mask;
1863} 1919}
1864 1920
1865/** 1921/**
1866 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset. 1922 * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
1867 * Must be called with callback_mutex held. 1923 * Must be called with callback_mutex held.
1868 **/ 1924 **/
1869cpumask_t cpuset_cpus_allowed_locked(struct task_struct *tsk) 1925void cpuset_cpus_allowed_locked(struct task_struct *tsk, cpumask_t *pmask)
1870{ 1926{
1871 cpumask_t mask;
1872
1873 task_lock(tsk); 1927 task_lock(tsk);
1874 guarantee_online_cpus(task_cs(tsk), &mask); 1928 guarantee_online_cpus(task_cs(tsk), pmask);
1875 task_unlock(tsk); 1929 task_unlock(tsk);
1876
1877 return mask;
1878} 1930}
1879 1931
1880void cpuset_init_current_mems_allowed(void) 1932void cpuset_init_current_mems_allowed(void)
1881{ 1933{
1882 current->mems_allowed = NODE_MASK_ALL; 1934 nodes_setall(current->mems_allowed);
1883} 1935}
1884 1936
1885/** 1937/**
@@ -2261,8 +2313,16 @@ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
2261 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count, 2313 m->count += cpumask_scnprintf(m->buf + m->count, m->size - m->count,
2262 task->cpus_allowed); 2314 task->cpus_allowed);
2263 seq_printf(m, "\n"); 2315 seq_printf(m, "\n");
2316 seq_printf(m, "Cpus_allowed_list:\t");
2317 m->count += cpulist_scnprintf(m->buf + m->count, m->size - m->count,
2318 task->cpus_allowed);
2319 seq_printf(m, "\n");
2264 seq_printf(m, "Mems_allowed:\t"); 2320 seq_printf(m, "Mems_allowed:\t");
2265 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count, 2321 m->count += nodemask_scnprintf(m->buf + m->count, m->size - m->count,
2266 task->mems_allowed); 2322 task->mems_allowed);
2267 seq_printf(m, "\n"); 2323 seq_printf(m, "\n");
2324 seq_printf(m, "Mems_allowed_list:\t");
2325 m->count += nodelist_scnprintf(m->buf + m->count, m->size - m->count,
2326 task->mems_allowed);
2327 seq_printf(m, "\n");
2268} 2328}