diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2008-04-15 01:04:23 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:45:00 -0400 |
commit | 1d3504fcf5606579d60b649d19f44b3871c1ddae (patch) | |
tree | 001a1b57bd7f123fd51361d78d9277b2dcac1bf3 | |
parent | 4d5f35533fb9b2cd553cec6611195bcbfb7ffd84 (diff) |
sched, cpuset: customize sched domains, core
[rebased for sched-devel/latest]
- Add a new cpuset file, having levels:
sched_relax_domain_level
- Modify partition_sched_domains() and build_sched_domains()
to take attributes parameter passed from cpuset.
- Fill newidle_idx for node domains which currently unused but
might be required if sched_relax_domain_level become higher.
- We can change the default level by boot option 'relax_domain_level='.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/asm-ia64/topology.h | 2 | ||||
-rw-r--r-- | include/asm-sh/topology.h | 2 | ||||
-rw-r--r-- | include/asm-x86/topology.h | 2 | ||||
-rw-r--r-- | include/linux/sched.h | 23 | ||||
-rw-r--r-- | kernel/cpuset.c | 61 | ||||
-rw-r--r-- | kernel/sched.c | 78 | ||||
-rw-r--r-- | kernel/sched_fair.c | 4 |
7 files changed, 161 insertions, 11 deletions
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index f929dde85343..f2f72ef2a897 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h | |||
@@ -93,7 +93,7 @@ void build_cpu_to_node_map(void); | |||
93 | .cache_nice_tries = 2, \ | 93 | .cache_nice_tries = 2, \ |
94 | .busy_idx = 3, \ | 94 | .busy_idx = 3, \ |
95 | .idle_idx = 2, \ | 95 | .idle_idx = 2, \ |
96 | .newidle_idx = 0, /* unused */ \ | 96 | .newidle_idx = 2, \ |
97 | .wake_idx = 1, \ | 97 | .wake_idx = 1, \ |
98 | .forkexec_idx = 1, \ | 98 | .forkexec_idx = 1, \ |
99 | .flags = SD_LOAD_BALANCE \ | 99 | .flags = SD_LOAD_BALANCE \ |
diff --git a/include/asm-sh/topology.h b/include/asm-sh/topology.h index f402a3b1cfa4..34cdb28e8f44 100644 --- a/include/asm-sh/topology.h +++ b/include/asm-sh/topology.h | |||
@@ -16,7 +16,7 @@ | |||
16 | .cache_nice_tries = 2, \ | 16 | .cache_nice_tries = 2, \ |
17 | .busy_idx = 3, \ | 17 | .busy_idx = 3, \ |
18 | .idle_idx = 2, \ | 18 | .idle_idx = 2, \ |
19 | .newidle_idx = 0, \ | 19 | .newidle_idx = 2, \ |
20 | .wake_idx = 1, \ | 20 | .wake_idx = 1, \ |
21 | .forkexec_idx = 1, \ | 21 | .forkexec_idx = 1, \ |
22 | .flags = SD_LOAD_BALANCE \ | 22 | .flags = SD_LOAD_BALANCE \ |
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 9ef74c5d5ad6..22073268b481 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h | |||
@@ -147,7 +147,7 @@ extern unsigned long node_remap_size[]; | |||
147 | 147 | ||
148 | # define SD_CACHE_NICE_TRIES 2 | 148 | # define SD_CACHE_NICE_TRIES 2 |
149 | # define SD_IDLE_IDX 2 | 149 | # define SD_IDLE_IDX 2 |
150 | # define SD_NEWIDLE_IDX 0 | 150 | # define SD_NEWIDLE_IDX 2 |
151 | # define SD_FORKEXEC_IDX 1 | 151 | # define SD_FORKEXEC_IDX 1 |
152 | 152 | ||
153 | #endif | 153 | #endif |
diff --git a/include/linux/sched.h b/include/linux/sched.h index ada24022d230..11f47249cdd2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -704,6 +704,7 @@ enum cpu_idle_type { | |||
704 | #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ | 704 | #define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ |
705 | #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ | 705 | #define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ |
706 | #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ | 706 | #define SD_SERIALIZE 1024 /* Only a single load balancing instance */ |
707 | #define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */ | ||
707 | 708 | ||
708 | #define BALANCE_FOR_MC_POWER \ | 709 | #define BALANCE_FOR_MC_POWER \ |
709 | (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) | 710 | (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) |
@@ -733,6 +734,24 @@ struct sched_group { | |||
733 | u32 reciprocal_cpu_power; | 734 | u32 reciprocal_cpu_power; |
734 | }; | 735 | }; |
735 | 736 | ||
737 | enum sched_domain_level { | ||
738 | SD_LV_NONE = 0, | ||
739 | SD_LV_SIBLING, | ||
740 | SD_LV_MC, | ||
741 | SD_LV_CPU, | ||
742 | SD_LV_NODE, | ||
743 | SD_LV_ALLNODES, | ||
744 | SD_LV_MAX | ||
745 | }; | ||
746 | |||
747 | struct sched_domain_attr { | ||
748 | int relax_domain_level; | ||
749 | }; | ||
750 | |||
751 | #define SD_ATTR_INIT (struct sched_domain_attr) { \ | ||
752 | .relax_domain_level = -1, \ | ||
753 | } | ||
754 | |||
736 | struct sched_domain { | 755 | struct sched_domain { |
737 | /* These fields must be setup */ | 756 | /* These fields must be setup */ |
738 | struct sched_domain *parent; /* top domain must be null terminated */ | 757 | struct sched_domain *parent; /* top domain must be null terminated */ |
@@ -750,6 +769,7 @@ struct sched_domain { | |||
750 | unsigned int wake_idx; | 769 | unsigned int wake_idx; |
751 | unsigned int forkexec_idx; | 770 | unsigned int forkexec_idx; |
752 | int flags; /* See SD_* */ | 771 | int flags; /* See SD_* */ |
772 | enum sched_domain_level level; | ||
753 | 773 | ||
754 | /* Runtime fields. */ | 774 | /* Runtime fields. */ |
755 | unsigned long last_balance; /* init to jiffies. units in jiffies */ | 775 | unsigned long last_balance; /* init to jiffies. units in jiffies */ |
@@ -789,7 +809,8 @@ struct sched_domain { | |||
789 | #endif | 809 | #endif |
790 | }; | 810 | }; |
791 | 811 | ||
792 | extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new); | 812 | extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, |
813 | struct sched_domain_attr *dattr_new); | ||
793 | extern int arch_reinit_sched_domains(void); | 814 | extern int arch_reinit_sched_domains(void); |
794 | 815 | ||
795 | #endif /* CONFIG_SMP */ | 816 | #endif /* CONFIG_SMP */ |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b0c870b2ac30..8b35fbd8292f 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -98,6 +98,9 @@ struct cpuset { | |||
98 | /* partition number for rebuild_sched_domains() */ | 98 | /* partition number for rebuild_sched_domains() */ |
99 | int pn; | 99 | int pn; |
100 | 100 | ||
101 | /* for custom sched domain */ | ||
102 | int relax_domain_level; | ||
103 | |||
101 | /* used for walking a cpuset heirarchy */ | 104 | /* used for walking a cpuset heirarchy */ |
102 | struct list_head stack_list; | 105 | struct list_head stack_list; |
103 | }; | 106 | }; |
@@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b) | |||
478 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); | 481 | return cpus_intersects(a->cpus_allowed, b->cpus_allowed); |
479 | } | 482 | } |
480 | 483 | ||
484 | static void | ||
485 | update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) | ||
486 | { | ||
487 | if (!dattr) | ||
488 | return; | ||
489 | if (dattr->relax_domain_level < c->relax_domain_level) | ||
490 | dattr->relax_domain_level = c->relax_domain_level; | ||
491 | return; | ||
492 | } | ||
493 | |||
481 | /* | 494 | /* |
482 | * rebuild_sched_domains() | 495 | * rebuild_sched_domains() |
483 | * | 496 | * |
@@ -553,12 +566,14 @@ static void rebuild_sched_domains(void) | |||
553 | int csn; /* how many cpuset ptrs in csa so far */ | 566 | int csn; /* how many cpuset ptrs in csa so far */ |
554 | int i, j, k; /* indices for partition finding loops */ | 567 | int i, j, k; /* indices for partition finding loops */ |
555 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ | 568 | cpumask_t *doms; /* resulting partition; i.e. sched domains */ |
569 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | ||
556 | int ndoms; /* number of sched domains in result */ | 570 | int ndoms; /* number of sched domains in result */ |
557 | int nslot; /* next empty doms[] cpumask_t slot */ | 571 | int nslot; /* next empty doms[] cpumask_t slot */ |
558 | 572 | ||
559 | q = NULL; | 573 | q = NULL; |
560 | csa = NULL; | 574 | csa = NULL; |
561 | doms = NULL; | 575 | doms = NULL; |
576 | dattr = NULL; | ||
562 | 577 | ||
563 | /* Special case for the 99% of systems with one, full, sched domain */ | 578 | /* Special case for the 99% of systems with one, full, sched domain */ |
564 | if (is_sched_load_balance(&top_cpuset)) { | 579 | if (is_sched_load_balance(&top_cpuset)) { |
@@ -566,6 +581,11 @@ static void rebuild_sched_domains(void) | |||
566 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); | 581 | doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); |
567 | if (!doms) | 582 | if (!doms) |
568 | goto rebuild; | 583 | goto rebuild; |
584 | dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL); | ||
585 | if (dattr) { | ||
586 | *dattr = SD_ATTR_INIT; | ||
587 | update_domain_attr(dattr, &top_cpuset); | ||
588 | } | ||
569 | *doms = top_cpuset.cpus_allowed; | 589 | *doms = top_cpuset.cpus_allowed; |
570 | goto rebuild; | 590 | goto rebuild; |
571 | } | 591 | } |
@@ -622,6 +642,7 @@ restart: | |||
622 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); | 642 | doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); |
623 | if (!doms) | 643 | if (!doms) |
624 | goto rebuild; | 644 | goto rebuild; |
645 | dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL); | ||
625 | 646 | ||
626 | for (nslot = 0, i = 0; i < csn; i++) { | 647 | for (nslot = 0, i = 0; i < csn; i++) { |
627 | struct cpuset *a = csa[i]; | 648 | struct cpuset *a = csa[i]; |
@@ -644,12 +665,15 @@ restart: | |||
644 | } | 665 | } |
645 | 666 | ||
646 | cpus_clear(*dp); | 667 | cpus_clear(*dp); |
668 | if (dattr) | ||
669 | *(dattr + nslot) = SD_ATTR_INIT; | ||
647 | for (j = i; j < csn; j++) { | 670 | for (j = i; j < csn; j++) { |
648 | struct cpuset *b = csa[j]; | 671 | struct cpuset *b = csa[j]; |
649 | 672 | ||
650 | if (apn == b->pn) { | 673 | if (apn == b->pn) { |
651 | cpus_or(*dp, *dp, b->cpus_allowed); | 674 | cpus_or(*dp, *dp, b->cpus_allowed); |
652 | b->pn = -1; | 675 | b->pn = -1; |
676 | update_domain_attr(dattr, b); | ||
653 | } | 677 | } |
654 | } | 678 | } |
655 | nslot++; | 679 | nslot++; |
@@ -660,7 +684,7 @@ restart: | |||
660 | rebuild: | 684 | rebuild: |
661 | /* Have scheduler rebuild sched domains */ | 685 | /* Have scheduler rebuild sched domains */ |
662 | get_online_cpus(); | 686 | get_online_cpus(); |
663 | partition_sched_domains(ndoms, doms); | 687 | partition_sched_domains(ndoms, doms, dattr); |
664 | put_online_cpus(); | 688 | put_online_cpus(); |
665 | 689 | ||
666 | done: | 690 | done: |
@@ -668,6 +692,7 @@ done: | |||
668 | kfifo_free(q); | 692 | kfifo_free(q); |
669 | kfree(csa); | 693 | kfree(csa); |
670 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ | 694 | /* Don't kfree(doms) -- partition_sched_domains() does that. */ |
695 | /* Don't kfree(dattr) -- partition_sched_domains() does that. */ | ||
671 | } | 696 | } |
672 | 697 | ||
673 | static inline int started_after_time(struct task_struct *t1, | 698 | static inline int started_after_time(struct task_struct *t1, |
@@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf) | |||
1011 | return 0; | 1036 | return 0; |
1012 | } | 1037 | } |
1013 | 1038 | ||
1039 | static int update_relax_domain_level(struct cpuset *cs, char *buf) | ||
1040 | { | ||
1041 | int val = simple_strtol(buf, NULL, 10); | ||
1042 | |||
1043 | if (val < 0) | ||
1044 | val = -1; | ||
1045 | |||
1046 | if (val != cs->relax_domain_level) { | ||
1047 | cs->relax_domain_level = val; | ||
1048 | rebuild_sched_domains(); | ||
1049 | } | ||
1050 | |||
1051 | return 0; | ||
1052 | } | ||
1053 | |||
1014 | /* | 1054 | /* |
1015 | * update_flag - read a 0 or a 1 in a file and update associated flag | 1055 | * update_flag - read a 0 or a 1 in a file and update associated flag |
1016 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, | 1056 | * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, |
@@ -1202,6 +1242,7 @@ typedef enum { | |||
1202 | FILE_CPU_EXCLUSIVE, | 1242 | FILE_CPU_EXCLUSIVE, |
1203 | FILE_MEM_EXCLUSIVE, | 1243 | FILE_MEM_EXCLUSIVE, |
1204 | FILE_SCHED_LOAD_BALANCE, | 1244 | FILE_SCHED_LOAD_BALANCE, |
1245 | FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1205 | FILE_MEMORY_PRESSURE_ENABLED, | 1246 | FILE_MEMORY_PRESSURE_ENABLED, |
1206 | FILE_MEMORY_PRESSURE, | 1247 | FILE_MEMORY_PRESSURE, |
1207 | FILE_SPREAD_PAGE, | 1248 | FILE_SPREAD_PAGE, |
@@ -1256,6 +1297,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont, | |||
1256 | case FILE_SCHED_LOAD_BALANCE: | 1297 | case FILE_SCHED_LOAD_BALANCE: |
1257 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); | 1298 | retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); |
1258 | break; | 1299 | break; |
1300 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1301 | retval = update_relax_domain_level(cs, buffer); | ||
1302 | break; | ||
1259 | case FILE_MEMORY_MIGRATE: | 1303 | case FILE_MEMORY_MIGRATE: |
1260 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); | 1304 | retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); |
1261 | break; | 1305 | break; |
@@ -1354,6 +1398,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont, | |||
1354 | case FILE_SCHED_LOAD_BALANCE: | 1398 | case FILE_SCHED_LOAD_BALANCE: |
1355 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; | 1399 | *s++ = is_sched_load_balance(cs) ? '1' : '0'; |
1356 | break; | 1400 | break; |
1401 | case FILE_SCHED_RELAX_DOMAIN_LEVEL: | ||
1402 | s += sprintf(s, "%d", cs->relax_domain_level); | ||
1403 | break; | ||
1357 | case FILE_MEMORY_MIGRATE: | 1404 | case FILE_MEMORY_MIGRATE: |
1358 | *s++ = is_memory_migrate(cs) ? '1' : '0'; | 1405 | *s++ = is_memory_migrate(cs) ? '1' : '0'; |
1359 | break; | 1406 | break; |
@@ -1424,6 +1471,13 @@ static struct cftype cft_sched_load_balance = { | |||
1424 | .private = FILE_SCHED_LOAD_BALANCE, | 1471 | .private = FILE_SCHED_LOAD_BALANCE, |
1425 | }; | 1472 | }; |
1426 | 1473 | ||
1474 | static struct cftype cft_sched_relax_domain_level = { | ||
1475 | .name = "sched_relax_domain_level", | ||
1476 | .read = cpuset_common_file_read, | ||
1477 | .write = cpuset_common_file_write, | ||
1478 | .private = FILE_SCHED_RELAX_DOMAIN_LEVEL, | ||
1479 | }; | ||
1480 | |||
1427 | static struct cftype cft_memory_migrate = { | 1481 | static struct cftype cft_memory_migrate = { |
1428 | .name = "memory_migrate", | 1482 | .name = "memory_migrate", |
1429 | .read = cpuset_common_file_read, | 1483 | .read = cpuset_common_file_read, |
@@ -1475,6 +1529,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) | |||
1475 | return err; | 1529 | return err; |
1476 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) | 1530 | if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) |
1477 | return err; | 1531 | return err; |
1532 | if ((err = cgroup_add_file(cont, ss, | ||
1533 | &cft_sched_relax_domain_level)) < 0) | ||
1534 | return err; | ||
1478 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) | 1535 | if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) |
1479 | return err; | 1536 | return err; |
1480 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) | 1537 | if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) |
@@ -1559,6 +1616,7 @@ static struct cgroup_subsys_state *cpuset_create( | |||
1559 | nodes_clear(cs->mems_allowed); | 1616 | nodes_clear(cs->mems_allowed); |
1560 | cs->mems_generation = cpuset_mems_generation++; | 1617 | cs->mems_generation = cpuset_mems_generation++; |
1561 | fmeter_init(&cs->fmeter); | 1618 | fmeter_init(&cs->fmeter); |
1619 | cs->relax_domain_level = -1; | ||
1562 | 1620 | ||
1563 | cs->parent = parent; | 1621 | cs->parent = parent; |
1564 | number_of_cpusets++; | 1622 | number_of_cpusets++; |
@@ -1631,6 +1689,7 @@ int __init cpuset_init(void) | |||
1631 | fmeter_init(&top_cpuset.fmeter); | 1689 | fmeter_init(&top_cpuset.fmeter); |
1632 | top_cpuset.mems_generation = cpuset_mems_generation++; | 1690 | top_cpuset.mems_generation = cpuset_mems_generation++; |
1633 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); | 1691 | set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); |
1692 | top_cpuset.relax_domain_level = -1; | ||
1634 | 1693 | ||
1635 | err = register_filesystem(&cpuset_fs_type); | 1694 | err = register_filesystem(&cpuset_fs_type); |
1636 | if (err < 0) | 1695 | if (err < 0) |
diff --git a/kernel/sched.c b/kernel/sched.c index 475e3fcab738..62d7481caca5 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -6771,6 +6771,7 @@ static noinline void sd_init_##type(struct sched_domain *sd) \ | |||
6771 | { \ | 6771 | { \ |
6772 | memset(sd, 0, sizeof(*sd)); \ | 6772 | memset(sd, 0, sizeof(*sd)); \ |
6773 | *sd = SD_##type##_INIT; \ | 6773 | *sd = SD_##type##_INIT; \ |
6774 | sd->level = SD_LV_##type; \ | ||
6774 | } | 6775 | } |
6775 | 6776 | ||
6776 | SD_INIT_FUNC(CPU) | 6777 | SD_INIT_FUNC(CPU) |
@@ -6819,11 +6820,42 @@ struct allmasks { | |||
6819 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ | 6820 | #define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ |
6820 | ((unsigned long)(a) + offsetof(struct allmasks, v)) | 6821 | ((unsigned long)(a) + offsetof(struct allmasks, v)) |
6821 | 6822 | ||
6823 | static int default_relax_domain_level = -1; | ||
6824 | |||
6825 | static int __init setup_relax_domain_level(char *str) | ||
6826 | { | ||
6827 | default_relax_domain_level = simple_strtoul(str, NULL, 0); | ||
6828 | return 1; | ||
6829 | } | ||
6830 | __setup("relax_domain_level=", setup_relax_domain_level); | ||
6831 | |||
6832 | static void set_domain_attribute(struct sched_domain *sd, | ||
6833 | struct sched_domain_attr *attr) | ||
6834 | { | ||
6835 | int request; | ||
6836 | |||
6837 | if (!attr || attr->relax_domain_level < 0) { | ||
6838 | if (default_relax_domain_level < 0) | ||
6839 | return; | ||
6840 | else | ||
6841 | request = default_relax_domain_level; | ||
6842 | } else | ||
6843 | request = attr->relax_domain_level; | ||
6844 | if (request < sd->level) { | ||
6845 | /* turn off idle balance on this domain */ | ||
6846 | sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); | ||
6847 | } else { | ||
6848 | /* turn on idle balance on this domain */ | ||
6849 | sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); | ||
6850 | } | ||
6851 | } | ||
6852 | |||
6822 | /* | 6853 | /* |
6823 | * Build sched domains for a given set of cpus and attach the sched domains | 6854 | * Build sched domains for a given set of cpus and attach the sched domains |
6824 | * to the individual cpus | 6855 | * to the individual cpus |
6825 | */ | 6856 | */ |
6826 | static int build_sched_domains(const cpumask_t *cpu_map) | 6857 | static int __build_sched_domains(const cpumask_t *cpu_map, |
6858 | struct sched_domain_attr *attr) | ||
6827 | { | 6859 | { |
6828 | int i; | 6860 | int i; |
6829 | struct root_domain *rd; | 6861 | struct root_domain *rd; |
@@ -6887,6 +6919,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6887 | SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { | 6919 | SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { |
6888 | sd = &per_cpu(allnodes_domains, i); | 6920 | sd = &per_cpu(allnodes_domains, i); |
6889 | SD_INIT(sd, ALLNODES); | 6921 | SD_INIT(sd, ALLNODES); |
6922 | set_domain_attribute(sd, attr); | ||
6890 | sd->span = *cpu_map; | 6923 | sd->span = *cpu_map; |
6891 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); | 6924 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); |
6892 | p = sd; | 6925 | p = sd; |
@@ -6896,6 +6929,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6896 | 6929 | ||
6897 | sd = &per_cpu(node_domains, i); | 6930 | sd = &per_cpu(node_domains, i); |
6898 | SD_INIT(sd, NODE); | 6931 | SD_INIT(sd, NODE); |
6932 | set_domain_attribute(sd, attr); | ||
6899 | sched_domain_node_span(cpu_to_node(i), &sd->span); | 6933 | sched_domain_node_span(cpu_to_node(i), &sd->span); |
6900 | sd->parent = p; | 6934 | sd->parent = p; |
6901 | if (p) | 6935 | if (p) |
@@ -6906,6 +6940,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6906 | p = sd; | 6940 | p = sd; |
6907 | sd = &per_cpu(phys_domains, i); | 6941 | sd = &per_cpu(phys_domains, i); |
6908 | SD_INIT(sd, CPU); | 6942 | SD_INIT(sd, CPU); |
6943 | set_domain_attribute(sd, attr); | ||
6909 | sd->span = *nodemask; | 6944 | sd->span = *nodemask; |
6910 | sd->parent = p; | 6945 | sd->parent = p; |
6911 | if (p) | 6946 | if (p) |
@@ -6916,6 +6951,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6916 | p = sd; | 6951 | p = sd; |
6917 | sd = &per_cpu(core_domains, i); | 6952 | sd = &per_cpu(core_domains, i); |
6918 | SD_INIT(sd, MC); | 6953 | SD_INIT(sd, MC); |
6954 | set_domain_attribute(sd, attr); | ||
6919 | sd->span = cpu_coregroup_map(i); | 6955 | sd->span = cpu_coregroup_map(i); |
6920 | cpus_and(sd->span, sd->span, *cpu_map); | 6956 | cpus_and(sd->span, sd->span, *cpu_map); |
6921 | sd->parent = p; | 6957 | sd->parent = p; |
@@ -6927,6 +6963,7 @@ static int build_sched_domains(const cpumask_t *cpu_map) | |||
6927 | p = sd; | 6963 | p = sd; |
6928 | sd = &per_cpu(cpu_domains, i); | 6964 | sd = &per_cpu(cpu_domains, i); |
6929 | SD_INIT(sd, SIBLING); | 6965 | SD_INIT(sd, SIBLING); |
6966 | set_domain_attribute(sd, attr); | ||
6930 | sd->span = per_cpu(cpu_sibling_map, i); | 6967 | sd->span = per_cpu(cpu_sibling_map, i); |
6931 | cpus_and(sd->span, sd->span, *cpu_map); | 6968 | cpus_and(sd->span, sd->span, *cpu_map); |
6932 | sd->parent = p; | 6969 | sd->parent = p; |
@@ -7124,8 +7161,15 @@ error: | |||
7124 | #endif | 7161 | #endif |
7125 | } | 7162 | } |
7126 | 7163 | ||
7164 | static int build_sched_domains(const cpumask_t *cpu_map) | ||
7165 | { | ||
7166 | return __build_sched_domains(cpu_map, NULL); | ||
7167 | } | ||
7168 | |||
7127 | static cpumask_t *doms_cur; /* current sched domains */ | 7169 | static cpumask_t *doms_cur; /* current sched domains */ |
7128 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 7170 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
7171 | static struct sched_domain_attr *dattr_cur; /* attribues of custom domains | ||
7172 | in 'doms_cur' */ | ||
7129 | 7173 | ||
7130 | /* | 7174 | /* |
7131 | * Special case: If a kmalloc of a doms_cur partition (array of | 7175 | * Special case: If a kmalloc of a doms_cur partition (array of |
@@ -7153,6 +7197,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map) | |||
7153 | if (!doms_cur) | 7197 | if (!doms_cur) |
7154 | doms_cur = &fallback_doms; | 7198 | doms_cur = &fallback_doms; |
7155 | cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); | 7199 | cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); |
7200 | dattr_cur = NULL; | ||
7156 | err = build_sched_domains(doms_cur); | 7201 | err = build_sched_domains(doms_cur); |
7157 | register_sched_domain_sysctl(); | 7202 | register_sched_domain_sysctl(); |
7158 | 7203 | ||
@@ -7182,6 +7227,22 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
7182 | arch_destroy_sched_domains(cpu_map, &tmpmask); | 7227 | arch_destroy_sched_domains(cpu_map, &tmpmask); |
7183 | } | 7228 | } |
7184 | 7229 | ||
7230 | /* handle null as "default" */ | ||
7231 | static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | ||
7232 | struct sched_domain_attr *new, int idx_new) | ||
7233 | { | ||
7234 | struct sched_domain_attr tmp; | ||
7235 | |||
7236 | /* fast path */ | ||
7237 | if (!new && !cur) | ||
7238 | return 1; | ||
7239 | |||
7240 | tmp = SD_ATTR_INIT; | ||
7241 | return !memcmp(cur ? (cur + idx_cur) : &tmp, | ||
7242 | new ? (new + idx_new) : &tmp, | ||
7243 | sizeof(struct sched_domain_attr)); | ||
7244 | } | ||
7245 | |||
7185 | /* | 7246 | /* |
7186 | * Partition sched domains as specified by the 'ndoms_new' | 7247 | * Partition sched domains as specified by the 'ndoms_new' |
7187 | * cpumasks in the array doms_new[] of cpumasks. This compares | 7248 | * cpumasks in the array doms_new[] of cpumasks. This compares |
@@ -7203,7 +7264,8 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) | |||
7203 | * | 7264 | * |
7204 | * Call with hotplug lock held | 7265 | * Call with hotplug lock held |
7205 | */ | 7266 | */ |
7206 | void partition_sched_domains(int ndoms_new, cpumask_t *doms_new) | 7267 | void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, |
7268 | struct sched_domain_attr *dattr_new) | ||
7207 | { | 7269 | { |
7208 | int i, j; | 7270 | int i, j; |
7209 | 7271 | ||
@@ -7216,12 +7278,14 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new) | |||
7216 | ndoms_new = 1; | 7278 | ndoms_new = 1; |
7217 | doms_new = &fallback_doms; | 7279 | doms_new = &fallback_doms; |
7218 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); | 7280 | cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); |
7281 | dattr_new = NULL; | ||
7219 | } | 7282 | } |
7220 | 7283 | ||
7221 | /* Destroy deleted domains */ | 7284 | /* Destroy deleted domains */ |
7222 | for (i = 0; i < ndoms_cur; i++) { | 7285 | for (i = 0; i < ndoms_cur; i++) { |
7223 | for (j = 0; j < ndoms_new; j++) { | 7286 | for (j = 0; j < ndoms_new; j++) { |
7224 | if (cpus_equal(doms_cur[i], doms_new[j])) | 7287 | if (cpus_equal(doms_cur[i], doms_new[j]) |
7288 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | ||
7225 | goto match1; | 7289 | goto match1; |
7226 | } | 7290 | } |
7227 | /* no match - a current sched domain not in new doms_new[] */ | 7291 | /* no match - a current sched domain not in new doms_new[] */ |
@@ -7233,11 +7297,13 @@ match1: | |||
7233 | /* Build new domains */ | 7297 | /* Build new domains */ |
7234 | for (i = 0; i < ndoms_new; i++) { | 7298 | for (i = 0; i < ndoms_new; i++) { |
7235 | for (j = 0; j < ndoms_cur; j++) { | 7299 | for (j = 0; j < ndoms_cur; j++) { |
7236 | if (cpus_equal(doms_new[i], doms_cur[j])) | 7300 | if (cpus_equal(doms_new[i], doms_cur[j]) |
7301 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | ||
7237 | goto match2; | 7302 | goto match2; |
7238 | } | 7303 | } |
7239 | /* no match - add a new doms_new */ | 7304 | /* no match - add a new doms_new */ |
7240 | build_sched_domains(doms_new + i); | 7305 | __build_sched_domains(doms_new + i, |
7306 | dattr_new ? dattr_new + i : NULL); | ||
7241 | match2: | 7307 | match2: |
7242 | ; | 7308 | ; |
7243 | } | 7309 | } |
@@ -7245,7 +7311,9 @@ match2: | |||
7245 | /* Remember the new sched domains */ | 7311 | /* Remember the new sched domains */ |
7246 | if (doms_cur != &fallback_doms) | 7312 | if (doms_cur != &fallback_doms) |
7247 | kfree(doms_cur); | 7313 | kfree(doms_cur); |
7314 | kfree(dattr_cur); /* kfree(NULL) is safe */ | ||
7248 | doms_cur = doms_new; | 7315 | doms_cur = doms_new; |
7316 | dattr_cur = dattr_new; | ||
7249 | ndoms_cur = ndoms_new; | 7317 | ndoms_cur = ndoms_new; |
7250 | 7318 | ||
7251 | register_sched_domain_sysctl(); | 7319 | register_sched_domain_sysctl(); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index de4250c53a19..b43748efaa7f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -940,7 +940,9 @@ static int wake_idle(int cpu, struct task_struct *p) | |||
940 | return cpu; | 940 | return cpu; |
941 | 941 | ||
942 | for_each_domain(cpu, sd) { | 942 | for_each_domain(cpu, sd) { |
943 | if (sd->flags & SD_WAKE_IDLE) { | 943 | if ((sd->flags & SD_WAKE_IDLE) |
944 | || ((sd->flags & SD_WAKE_IDLE_FAR) | ||
945 | && !task_hot(p, task_rq(p)->clock, sd))) { | ||
944 | cpus_and(tmp, sd->span, p->cpus_allowed); | 946 | cpus_and(tmp, sd->span, p->cpus_allowed); |
945 | for_each_cpu_mask(i, tmp) { | 947 | for_each_cpu_mask(i, tmp) { |
946 | if (idle_cpu(i)) { | 948 | if (idle_cpu(i)) { |