diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2008-04-15 01:04:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:45:00 -0400 |
commit | 1d3504fcf5606579d60b649d19f44b3871c1ddae (patch) | |
tree | 001a1b57bd7f123fd51361d78d9277b2dcac1bf3 /kernel/cpuset.c | |
parent | 4d5f35533fb9b2cd553cec6611195bcbfb7ffd84 (diff) |
sched, cpuset: customize sched domains, core
[rebased for sched-devel/latest]
- Add a new cpuset file, having levels:
sched_relax_domain_level
- Modify partition_sched_domains() and build_sched_domains()
to take attributes parameter passed from cpuset.
- Fill newidle_idx for node domains which currently unused but
might be required if sched_relax_domain_level become higher.
- We can change the default level by boot option 'relax_domain_level='.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/cpuset.c')
-rw-r--r-- | kernel/cpuset.c | 61 |
1 files changed, 60 insertions, 1 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b0c870b2ac30..8b35fbd8292f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -98,6 +98,9 @@ struct cpuset { | |||
98 | /* partition number for rebuild_sched_domains() */ | 98 | /* partition number for rebuild_sched_domains() */ |
99 | int pn; | 99 | int pn; |
100 | 100 | ||
101 | /* for custom sched domain */ | ||
102 | int relax_domain_level; | ||
103 | |||
101 | /* used for walking a cpuset heirarchy */ | 104 | /* used for walking a cpuset heirarchy */ |
102 | struct list_head stack_list; | 105 | struct list_head stack_list; |
103 | }; | 106 | }; |
@@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | |||
478 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); | 481 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); |
479 | } | 482 | } |
480 | 483 | ||
484 | static void | ||
485 | update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | ||
486 | { | ||
487 | if (!dattr) | ||
488 | return; | ||
489 | if (dattr->relax_domain_level < c->relax_domain_level) | ||
490 | dattr->relax_domain_level = c->relax_domain_level; | ||
491 | return; | ||
492 | } | ||
493 | |||
481 | /* | 494 | /* |
482 | * rebuild_sched_domains() | 495 | * rebuild_sched_domains() |
483 | * | 496 | * |
@@ -553,12 +566,14 @@ static void rebuild_sched_domains(void) | |||
553 | int csn; /* how many cpuset ptrs in csa so far */ | 566 | int csn; /* how many cpuset ptrs in csa so far */ |
554 | int i, j, k; /* indices for partition finding loops */ | 567 | int i, j, k; /* indices for partition finding loops */ |
555 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ | 568 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ |
569 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | ||
556 | int ndoms; /* number of sched domains in result */ | 570 | int ndoms; /* number of sched domains in result */ |
557 | int nslot; /* next empty doms[] cpumask_t slot */ | 571 | int nslot; /* next empty doms[] cpumask_t slot */ |
558 | 572 | ||
559 | q = NULL; | 573 | q = NULL; |
560 | csa = NULL; | 574 | csa = NULL; |
561 | doms = NULL; | 575 | doms = NULL; |
576 | dattr = NULL; | ||
562 | 577 | ||
563 | /* Special case for the 99% of systems with one, full, sched domain */ | 578 | /* Special case for the 99% of systems with one, full, sched domain */ |
564 | if (is_sched_load_balance(&top_cpuset)) { | 579 | if (is_sched_load_balance(&top_cpuset)) { |
@@ -566,6 +581,11 @@ static void rebuild_sched_domains(void) | |||
566 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | 581 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); |
567 | if (!doms) | 582 | if (!doms) |
568 | goto rebuild; | 583 | goto rebuild; |
584 | dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); | ||
585 | if (dattr) { | ||
586 | *dattr = SD_ATTR_INIT; | ||
587 | update_domain_attr(dattr, &top_cpuset); | ||
588 | } | ||
569 | *doms = top_cpuset.cpus_allowed; | 589 | *doms = top_cpuset.cpus_allowed; |
570 | goto rebuild; | 590 | goto rebuild; |
571 | } | 591 | } |
@@ -622,6 +642,7 @@ restart: | |||
622 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); | 642 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); |
623 | if (!doms) | 643 | if (!doms) |
624 | goto rebuild; | 644 | goto rebuild; |
645 | dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); | ||
625 | 646 | ||
626 | for (nslot = 0, i = 0; i < csn; i++) { | 647 | for (nslot = 0, i = 0; i < csn; i++) { |
627 | struct cpuset *a = csa[i]; | 648 | struct cpuset *a = csa[i]; |
@@ -644,12 +665,15 @@ restart: | |||
644 | } | 665 | } |
645 | 666 | ||
646 | cpus_clear(*dp); | 667 | cpus_clear(*dp); |
668 | if (dattr) | ||
669 | *(dattr + nslot) = SD_ATTR_INIT; | ||
647 | for (j = i; j < csn; j++) { | 670 | for (j = i; j < csn; j++) { |
648 | struct cpuset *b = csa[j]; | 671 | struct cpuset *b = csa[j]; |
649 | 672 | ||
650 | if (apn == b->pn) { | 673 | if (apn == b->pn) { |
651 | cpus_or(*dp, *dp, b->cpus_allowed); | 674 | cpus_or(*dp, *dp, b->cpus_allowed); |
652 | b->pn = -1; | 675 | b->pn = -1; |
676 | update_domain_attr(dattr, b); | ||
653 | } | 677 | } |
654 | } | 678 | } |
655 | nslot++; | 679 | nslot++; |
@@ -660,7 +684,7 @@ restart: | |||
660 | rebuild: | 684 | rebuild: |
661 | /* Have scheduler rebuild sched domains */ | 685 | /* Have scheduler rebuild sched domains */ |
662 | get_online_cpus(); | 686 | get_online_cpus(); |
663 | partition_sched_domains(ndoms, doms); | 687 | partition_sched_domains(ndoms, doms, dattr); |
664 | put_online_cpus(); | 688 | put_online_cpus(); |
665 | 689 | ||
666 | done: | 690 | done: |
@@ -668,6 +692,7 @@ done: | |||
668 | kfifo_free(q); | 692 | kfifo_free(q); |
669 | kfree(csa); | 693 | kfree(csa); |
670 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ | 694 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ |
695 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ | ||
671 | } | 696 | } |
672 | 697 | ||
673 | static inline int started_after_time(struct task_struct *t1, | 698 | static inline int started_after_time(struct task_struct *t1, |
@@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | |||
1011 | return 0; | 1036 | return 0; |
1012 | } | 1037 | } |
1013 | 1038 | ||
1039 | static int update_relax_domain_level(struct cpuset *cs, char *buf) | ||
1040 | { | ||
1041 | int val = simple_strtol(buf, NULL, 10); | ||
1042 | |||
1043 | if (val < 0) | ||
1044 | val = -1; | ||
1045 | |||
1046 | if (val != cs->relax_domain_level) { | ||
1047 | cs->relax_domain_level = val; | ||
1048 | rebuild_sched_domains(); | ||
1049 | } | ||
1050 | |||
1051 | return 0; | ||
1052 | } | ||
1053 | |||
1014 | /* | 1054 | /* |
1015 | * update_flag - read a 0 or a 1 in a file and update associated flag | 1055 | * update_flag - read a 0 or a 1 in a file and update associated flag |
1016 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 1056 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, |
@@ -1202,6 +1242,7 @@ typedef enum { | |||
1202 | FILE_CPU_EXCLUSIVE, | 1242 | FILE_CPU_EXCLUSIVE, |
1203 | FILE_MEM_EXCLUSIVE, | 1243 | FILE_MEM_EXCLUSIVE, |
1204 | FILE_SCHED_LOAD_BALANCE, | 1244 | FILE_SCHED_LOAD_BALANCE, |
1245 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1205 | FILE_MEMORY_PRESSURE_ENABLED, | 1246 | FILE_MEMORY_PRESSURE_ENABLED, |
1206 | FILE_MEMORY_PRESSURE, | 1247 | FILE_MEMORY_PRESSURE, |
1207 | FILE_SPREAD_PAGE, | 1248 | FILE_SPREAD_PAGE, |
@@ -1256,6 +1297,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1256 | case FILE_SCHED_LOAD_BALANCE: | 1297 | case FILE_SCHED_LOAD_BALANCE: |
1257 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); | 1298 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); |
1258 | break; | 1299 | break; |
1300 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1301 | retval = update_relax_domain_level(cs, buffer); | ||
1302 | break; | ||
1259 | case FILE_MEMORY_MIGRATE: | 1303 | case FILE_MEMORY_MIGRATE: |
1260 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); | 1304 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); |
1261 | break; | 1305 | break; |
@@ -1354,6 +1398,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, | |||
1354 | case FILE_SCHED_LOAD_BALANCE: | 1398 | case FILE_SCHED_LOAD_BALANCE: |
1355 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; | 1399 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; |
1356 | break; | 1400 | break; |
1401 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1402 | s += sprintf(s, "%d", cs->relax_domain_level); | ||
1403 | break; | ||
1357 | case FILE_MEMORY_MIGRATE: | 1404 | case FILE_MEMORY_MIGRATE: |
1358 | *s++ = is_memory_migrate(cs) ? '1' : '0'; | 1405 | *s++ = is_memory_migrate(cs) ? '1' : '0'; |
1359 | break; | 1406 | break; |
@@ -1424,6 +1471,13 @@ static struct cftype cft_sched_load_balance = { | |||
1424 | .private = FILE_SCHED_LOAD_BALANCE, | 1471 | .private = FILE_SCHED_LOAD_BALANCE, |
1425 | }; | 1472 | }; |
1426 | 1473 | ||
1474 | static struct cftype cft_sched_relax_domain_level = { | ||
1475 | .name = "sched_relax_domain_level", | ||
1476 | .read = cpuset_common_file_read, | ||
1477 | .write = cpuset_common_file_write, | ||
1478 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1479 | }; | ||
1480 | |||
1427 | static struct cftype cft_memory_migrate = { | 1481 | static struct cftype cft_memory_migrate = { |
1428 | .name = "memory_migrate", | 1482 | .name = "memory_migrate", |
1429 | .read = cpuset_common_file_read, | 1483 | .read = cpuset_common_file_read, |
@@ -1475,6 +1529,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1475 | return err; | 1529 | return err; |
1476 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) | 1530 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) |
1477 | return err; | 1531 | return err; |
1532 | if ((err = cgroup_add_file(cont, ss, | ||
1533 | &cft_sched_relax_domain_level)) < 0) | ||
1534 | return err; | ||
1478 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) | 1535 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) |
1479 | return err; | 1536 | return err; |
1480 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) | 1537 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) |
@@ -1559,6 +1616,7 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1559 | nodes_clear(cs->mems_allowed); | 1616 | nodes_clear(cs->mems_allowed); |
1560 | cs->mems_generation = cpuset_mems_generation++; | 1617 | cs->mems_generation = cpuset_mems_generation++; |
1561 | fmeter_init(&cs->fmeter); | 1618 | fmeter_init(&cs->fmeter); |
1619 | cs->relax_domain_level = -1; | ||
1562 | 1620 | ||
1563 | cs->parent = parent; | 1621 | cs->parent = parent; |
1564 | number_of_cpusets++; | 1622 | number_of_cpusets++; |
@@ -1631,6 +1689,7 @@ int __init cpuset_init(void) | |||
1631 | fmeter_init(&top_cpuset.fmeter); | 1689 | fmeter_init(&top_cpuset.fmeter); |
1632 | top_cpuset.mems_generation = cpuset_mems_generation++; | 1690 | top_cpuset.mems_generation = cpuset_mems_generation++; |
1633 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); | 1691 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); |
1692 | top_cpuset.relax_domain_level = -1; | ||
1634 | 1693 | ||
1635 | err = register_filesystem(&cpuset_fs_type); | 1694 | err = register_filesystem(&cpuset_fs_type); |
1636 | if (err < 0) | 1695 | if (err < 0) |