aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2008-04-15 01:04:23 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:45:00 -0400
commit1d3504fcf5606579d60b649d19f44b3871c1ddae (patch)
tree001a1b57bd7f123fd51361d78d9277b2dcac1bf3
parent4d5f35533fb9b2cd553cec6611195bcbfb7ffd84 (diff)
sched, cpuset: customize sched domains, core
[rebased for sched-devel/latest] - Add a new cpuset file, having levels: sched_relax_domain_level - Modify partition_sched_domains() and build_sched_domains() to take attributes parameter passed from cpuset. - Fill newidle_idx for node domains which currently unused but might be required if sched_relax_domain_level become higher. - We can change the default level by boot option 'relax_domain_level='. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/asm-ia64/topology.h2
-rw-r--r--include/asm-sh/topology.h2
-rw-r--r--include/asm-x86/topology.h2
-rw-r--r--include/linux/sched.h23
-rw-r--r--kernel/cpuset.c61
-rw-r--r--kernel/sched.c78
-rw-r--r--kernel/sched_fair.c4
7 files changed, 161 insertions, 11 deletions
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h
index f929dde85343..f2f72ef2a897 100644
--- a/include/asm-ia64/topology.h
+++ b/include/asm-ia64/topology.h
@@ -93,7 +93,7 @@ void build_cpu_to_node_map(void);
93 .cache_nice_tries = 2, \ 93 .cache_nice_tries = 2, \
94 .busy_idx = 3, \ 94 .busy_idx = 3, \
95 .idle_idx = 2, \ 95 .idle_idx = 2, \
96 .newidle_idx = 0, /* unused */ \ 96 .newidle_idx = 2, \
97 .wake_idx = 1, \ 97 .wake_idx = 1, \
98 .forkexec_idx = 1, \ 98 .forkexec_idx = 1, \
99 .flags = SD_LOAD_BALANCE \ 99 .flags = SD_LOAD_BALANCE \
diff --git a/include/asm-sh/topology.h b/include/asm-sh/topology.h
index f402a3b1cfa4..34cdb28e8f44 100644
--- a/include/asm-sh/topology.h
+++ b/include/asm-sh/topology.h
@@ -16,7 +16,7 @@
16 .cache_nice_tries = 2, \ 16 .cache_nice_tries = 2, \
17 .busy_idx = 3, \ 17 .busy_idx = 3, \
18 .idle_idx = 2, \ 18 .idle_idx = 2, \
19 .newidle_idx = 0, \ 19 .newidle_idx = 2, \
20 .wake_idx = 1, \ 20 .wake_idx = 1, \
21 .forkexec_idx = 1, \ 21 .forkexec_idx = 1, \
22 .flags = SD_LOAD_BALANCE \ 22 .flags = SD_LOAD_BALANCE \
diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h
index 9ef74c5d5ad6..22073268b481 100644
--- a/include/asm-x86/topology.h
+++ b/include/asm-x86/topology.h
@@ -147,7 +147,7 @@ extern unsigned long node_remap_size[];
147 147
148# define SD_CACHE_NICE_TRIES 2 148# define SD_CACHE_NICE_TRIES 2
149# define SD_IDLE_IDX 2 149# define SD_IDLE_IDX 2
150# define SD_NEWIDLE_IDX 0 150# define SD_NEWIDLE_IDX 2
151# define SD_FORKEXEC_IDX 1 151# define SD_FORKEXEC_IDX 1
152 152
153#endif 153#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ada24022d230..11f47249cdd2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -704,6 +704,7 @@ enum cpu_idle_type {
704#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */ 704#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */
705#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */ 705#define SD_SHARE_PKG_RESOURCES 512 /* Domain members share cpu pkg resources */
706#define SD_SERIALIZE 1024 /* Only a single load balancing instance */ 706#define SD_SERIALIZE 1024 /* Only a single load balancing instance */
707#define SD_WAKE_IDLE_FAR 2048 /* Gain latency sacrificing cache hit */
707 708
708#define BALANCE_FOR_MC_POWER \ 709#define BALANCE_FOR_MC_POWER \
709 (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0) 710 (sched_smt_power_savings ? SD_POWERSAVINGS_BALANCE : 0)
@@ -733,6 +734,24 @@ struct sched_group {
733 u32 reciprocal_cpu_power; 734 u32 reciprocal_cpu_power;
734}; 735};
735 736
737enum sched_domain_level {
738 SD_LV_NONE = 0,
739 SD_LV_SIBLING,
740 SD_LV_MC,
741 SD_LV_CPU,
742 SD_LV_NODE,
743 SD_LV_ALLNODES,
744 SD_LV_MAX
745};
746
747struct sched_domain_attr {
748 int relax_domain_level;
749};
750
751#define SD_ATTR_INIT (struct sched_domain_attr) { \
752 .relax_domain_level = -1, \
753}
754
736struct sched_domain { 755struct sched_domain {
737 /* These fields must be setup */ 756 /* These fields must be setup */
738 struct sched_domain *parent; /* top domain must be null terminated */ 757 struct sched_domain *parent; /* top domain must be null terminated */
@@ -750,6 +769,7 @@ struct sched_domain {
750 unsigned int wake_idx; 769 unsigned int wake_idx;
751 unsigned int forkexec_idx; 770 unsigned int forkexec_idx;
752 int flags; /* See SD_* */ 771 int flags; /* See SD_* */
772 enum sched_domain_level level;
753 773
754 /* Runtime fields. */ 774 /* Runtime fields. */
755 unsigned long last_balance; /* init to jiffies. units in jiffies */ 775 unsigned long last_balance; /* init to jiffies. units in jiffies */
@@ -789,7 +809,8 @@ struct sched_domain {
789#endif 809#endif
790}; 810};
791 811
792extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new); 812extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
813 struct sched_domain_attr *dattr_new);
793extern int arch_reinit_sched_domains(void); 814extern int arch_reinit_sched_domains(void);
794 815
795#endif /* CONFIG_SMP */ 816#endif /* CONFIG_SMP */
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index b0c870b2ac30..8b35fbd8292f 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -98,6 +98,9 @@ struct cpuset {
98 /* partition number for rebuild_sched_domains() */ 98 /* partition number for rebuild_sched_domains() */
99 int pn; 99 int pn;
100 100
101 /* for custom sched domain */
102 int relax_domain_level;
103
101 /* used for walking a cpuset heirarchy */ 104 /* used for walking a cpuset heirarchy */
102 struct list_head stack_list; 105 struct list_head stack_list;
103}; 106};
@@ -478,6 +481,16 @@ static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
478 return cpus_intersects(a->cpus_allowed, b->cpus_allowed); 481 return cpus_intersects(a->cpus_allowed, b->cpus_allowed);
479} 482}
480 483
484static void
485update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
486{
487 if (!dattr)
488 return;
489 if (dattr->relax_domain_level < c->relax_domain_level)
490 dattr->relax_domain_level = c->relax_domain_level;
491 return;
492}
493
481/* 494/*
482 * rebuild_sched_domains() 495 * rebuild_sched_domains()
483 * 496 *
@@ -553,12 +566,14 @@ static void rebuild_sched_domains(void)
553 int csn; /* how many cpuset ptrs in csa so far */ 566 int csn; /* how many cpuset ptrs in csa so far */
554 int i, j, k; /* indices for partition finding loops */ 567 int i, j, k; /* indices for partition finding loops */
555 cpumask_t *doms; /* resulting partition; i.e. sched domains */ 568 cpumask_t *doms; /* resulting partition; i.e. sched domains */
569 struct sched_domain_attr *dattr; /* attributes for custom domains */
556 int ndoms; /* number of sched domains in result */ 570 int ndoms; /* number of sched domains in result */
557 int nslot; /* next empty doms[] cpumask_t slot */ 571 int nslot; /* next empty doms[] cpumask_t slot */
558 572
559 q = NULL; 573 q = NULL;
560 csa = NULL; 574 csa = NULL;
561 doms = NULL; 575 doms = NULL;
576 dattr = NULL;
562 577
563 /* Special case for the 99% of systems with one, full, sched domain */ 578 /* Special case for the 99% of systems with one, full, sched domain */
564 if (is_sched_load_balance(&top_cpuset)) { 579 if (is_sched_load_balance(&top_cpuset)) {
@@ -566,6 +581,11 @@ static void rebuild_sched_domains(void)
566 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 581 doms = kmalloc(sizeof(cpumask_t), GFP_KERNEL);
567 if (!doms) 582 if (!doms)
568 goto rebuild; 583 goto rebuild;
584 dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
585 if (dattr) {
586 *dattr = SD_ATTR_INIT;
587 update_domain_attr(dattr, &top_cpuset);
588 }
569 *doms = top_cpuset.cpus_allowed; 589 *doms = top_cpuset.cpus_allowed;
570 goto rebuild; 590 goto rebuild;
571 } 591 }
@@ -622,6 +642,7 @@ restart:
622 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL); 642 doms = kmalloc(ndoms * sizeof(cpumask_t), GFP_KERNEL);
623 if (!doms) 643 if (!doms)
624 goto rebuild; 644 goto rebuild;
645 dattr = kmalloc(ndoms * sizeof(struct sched_domain_attr), GFP_KERNEL);
625 646
626 for (nslot = 0, i = 0; i < csn; i++) { 647 for (nslot = 0, i = 0; i < csn; i++) {
627 struct cpuset *a = csa[i]; 648 struct cpuset *a = csa[i];
@@ -644,12 +665,15 @@ restart:
644 } 665 }
645 666
646 cpus_clear(*dp); 667 cpus_clear(*dp);
668 if (dattr)
669 *(dattr + nslot) = SD_ATTR_INIT;
647 for (j = i; j < csn; j++) { 670 for (j = i; j < csn; j++) {
648 struct cpuset *b = csa[j]; 671 struct cpuset *b = csa[j];
649 672
650 if (apn == b->pn) { 673 if (apn == b->pn) {
651 cpus_or(*dp, *dp, b->cpus_allowed); 674 cpus_or(*dp, *dp, b->cpus_allowed);
652 b->pn = -1; 675 b->pn = -1;
676 update_domain_attr(dattr, b);
653 } 677 }
654 } 678 }
655 nslot++; 679 nslot++;
@@ -660,7 +684,7 @@ restart:
660rebuild: 684rebuild:
661 /* Have scheduler rebuild sched domains */ 685 /* Have scheduler rebuild sched domains */
662 get_online_cpus(); 686 get_online_cpus();
663 partition_sched_domains(ndoms, doms); 687 partition_sched_domains(ndoms, doms, dattr);
664 put_online_cpus(); 688 put_online_cpus();
665 689
666done: 690done:
@@ -668,6 +692,7 @@ done:
668 kfifo_free(q); 692 kfifo_free(q);
669 kfree(csa); 693 kfree(csa);
670 /* Don't kfree(doms) -- partition_sched_domains() does that. */ 694 /* Don't kfree(doms) -- partition_sched_domains() does that. */
695 /* Don't kfree(dattr) -- partition_sched_domains() does that. */
671} 696}
672 697
673static inline int started_after_time(struct task_struct *t1, 698static inline int started_after_time(struct task_struct *t1,
@@ -1011,6 +1036,21 @@ static int update_memory_pressure_enabled(struct cpuset *cs, char *buf)
1011 return 0; 1036 return 0;
1012} 1037}
1013 1038
1039static int update_relax_domain_level(struct cpuset *cs, char *buf)
1040{
1041 int val = simple_strtol(buf, NULL, 10);
1042
1043 if (val < 0)
1044 val = -1;
1045
1046 if (val != cs->relax_domain_level) {
1047 cs->relax_domain_level = val;
1048 rebuild_sched_domains();
1049 }
1050
1051 return 0;
1052}
1053
1014/* 1054/*
1015 * update_flag - read a 0 or a 1 in a file and update associated flag 1055 * update_flag - read a 0 or a 1 in a file and update associated flag
1016 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE, 1056 * bit: the bit to update (CS_CPU_EXCLUSIVE, CS_MEM_EXCLUSIVE,
@@ -1202,6 +1242,7 @@ typedef enum {
1202 FILE_CPU_EXCLUSIVE, 1242 FILE_CPU_EXCLUSIVE,
1203 FILE_MEM_EXCLUSIVE, 1243 FILE_MEM_EXCLUSIVE,
1204 FILE_SCHED_LOAD_BALANCE, 1244 FILE_SCHED_LOAD_BALANCE,
1245 FILE_SCHED_RELAX_DOMAIN_LEVEL,
1205 FILE_MEMORY_PRESSURE_ENABLED, 1246 FILE_MEMORY_PRESSURE_ENABLED,
1206 FILE_MEMORY_PRESSURE, 1247 FILE_MEMORY_PRESSURE,
1207 FILE_SPREAD_PAGE, 1248 FILE_SPREAD_PAGE,
@@ -1256,6 +1297,9 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
1256 case FILE_SCHED_LOAD_BALANCE: 1297 case FILE_SCHED_LOAD_BALANCE:
1257 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer); 1298 retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, buffer);
1258 break; 1299 break;
1300 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1301 retval = update_relax_domain_level(cs, buffer);
1302 break;
1259 case FILE_MEMORY_MIGRATE: 1303 case FILE_MEMORY_MIGRATE:
1260 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer); 1304 retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
1261 break; 1305 break;
@@ -1354,6 +1398,9 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
1354 case FILE_SCHED_LOAD_BALANCE: 1398 case FILE_SCHED_LOAD_BALANCE:
1355 *s++ = is_sched_load_balance(cs) ? '1' : '0'; 1399 *s++ = is_sched_load_balance(cs) ? '1' : '0';
1356 break; 1400 break;
1401 case FILE_SCHED_RELAX_DOMAIN_LEVEL:
1402 s += sprintf(s, "%d", cs->relax_domain_level);
1403 break;
1357 case FILE_MEMORY_MIGRATE: 1404 case FILE_MEMORY_MIGRATE:
1358 *s++ = is_memory_migrate(cs) ? '1' : '0'; 1405 *s++ = is_memory_migrate(cs) ? '1' : '0';
1359 break; 1406 break;
@@ -1424,6 +1471,13 @@ static struct cftype cft_sched_load_balance = {
1424 .private = FILE_SCHED_LOAD_BALANCE, 1471 .private = FILE_SCHED_LOAD_BALANCE,
1425}; 1472};
1426 1473
1474static struct cftype cft_sched_relax_domain_level = {
1475 .name = "sched_relax_domain_level",
1476 .read = cpuset_common_file_read,
1477 .write = cpuset_common_file_write,
1478 .private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
1479};
1480
1427static struct cftype cft_memory_migrate = { 1481static struct cftype cft_memory_migrate = {
1428 .name = "memory_migrate", 1482 .name = "memory_migrate",
1429 .read = cpuset_common_file_read, 1483 .read = cpuset_common_file_read,
@@ -1475,6 +1529,9 @@ static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont)
1475 return err; 1529 return err;
1476 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0) 1530 if ((err = cgroup_add_file(cont, ss, &cft_sched_load_balance)) < 0)
1477 return err; 1531 return err;
1532 if ((err = cgroup_add_file(cont, ss,
1533 &cft_sched_relax_domain_level)) < 0)
1534 return err;
1478 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0) 1535 if ((err = cgroup_add_file(cont, ss, &cft_memory_pressure)) < 0)
1479 return err; 1536 return err;
1480 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0) 1537 if ((err = cgroup_add_file(cont, ss, &cft_spread_page)) < 0)
@@ -1559,6 +1616,7 @@ static struct cgroup_subsys_state *cpuset_create(
1559 nodes_clear(cs->mems_allowed); 1616 nodes_clear(cs->mems_allowed);
1560 cs->mems_generation = cpuset_mems_generation++; 1617 cs->mems_generation = cpuset_mems_generation++;
1561 fmeter_init(&cs->fmeter); 1618 fmeter_init(&cs->fmeter);
1619 cs->relax_domain_level = -1;
1562 1620
1563 cs->parent = parent; 1621 cs->parent = parent;
1564 number_of_cpusets++; 1622 number_of_cpusets++;
@@ -1631,6 +1689,7 @@ int __init cpuset_init(void)
1631 fmeter_init(&top_cpuset.fmeter); 1689 fmeter_init(&top_cpuset.fmeter);
1632 top_cpuset.mems_generation = cpuset_mems_generation++; 1690 top_cpuset.mems_generation = cpuset_mems_generation++;
1633 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); 1691 set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
1692 top_cpuset.relax_domain_level = -1;
1634 1693
1635 err = register_filesystem(&cpuset_fs_type); 1694 err = register_filesystem(&cpuset_fs_type);
1636 if (err < 0) 1695 if (err < 0)
diff --git a/kernel/sched.c b/kernel/sched.c
index 475e3fcab738..62d7481caca5 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6771,6 +6771,7 @@ static noinline void sd_init_##type(struct sched_domain *sd) \
6771{ \ 6771{ \
6772 memset(sd, 0, sizeof(*sd)); \ 6772 memset(sd, 0, sizeof(*sd)); \
6773 *sd = SD_##type##_INIT; \ 6773 *sd = SD_##type##_INIT; \
6774 sd->level = SD_LV_##type; \
6774} 6775}
6775 6776
6776SD_INIT_FUNC(CPU) 6777SD_INIT_FUNC(CPU)
@@ -6819,11 +6820,42 @@ struct allmasks {
6819#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ 6820#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
6820 ((unsigned long)(a) + offsetof(struct allmasks, v)) 6821 ((unsigned long)(a) + offsetof(struct allmasks, v))
6821 6822
6823static int default_relax_domain_level = -1;
6824
6825static int __init setup_relax_domain_level(char *str)
6826{
6827 default_relax_domain_level = simple_strtoul(str, NULL, 0);
6828 return 1;
6829}
6830__setup("relax_domain_level=", setup_relax_domain_level);
6831
6832static void set_domain_attribute(struct sched_domain *sd,
6833 struct sched_domain_attr *attr)
6834{
6835 int request;
6836
6837 if (!attr || attr->relax_domain_level < 0) {
6838 if (default_relax_domain_level < 0)
6839 return;
6840 else
6841 request = default_relax_domain_level;
6842 } else
6843 request = attr->relax_domain_level;
6844 if (request < sd->level) {
6845 /* turn off idle balance on this domain */
6846 sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE);
6847 } else {
6848 /* turn on idle balance on this domain */
6849 sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE);
6850 }
6851}
6852
6822/* 6853/*
6823 * Build sched domains for a given set of cpus and attach the sched domains 6854 * Build sched domains for a given set of cpus and attach the sched domains
6824 * to the individual cpus 6855 * to the individual cpus
6825 */ 6856 */
6826static int build_sched_domains(const cpumask_t *cpu_map) 6857static int __build_sched_domains(const cpumask_t *cpu_map,
6858 struct sched_domain_attr *attr)
6827{ 6859{
6828 int i; 6860 int i;
6829 struct root_domain *rd; 6861 struct root_domain *rd;
@@ -6887,6 +6919,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6887 SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { 6919 SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) {
6888 sd = &per_cpu(allnodes_domains, i); 6920 sd = &per_cpu(allnodes_domains, i);
6889 SD_INIT(sd, ALLNODES); 6921 SD_INIT(sd, ALLNODES);
6922 set_domain_attribute(sd, attr);
6890 sd->span = *cpu_map; 6923 sd->span = *cpu_map;
6891 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); 6924 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
6892 p = sd; 6925 p = sd;
@@ -6896,6 +6929,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6896 6929
6897 sd = &per_cpu(node_domains, i); 6930 sd = &per_cpu(node_domains, i);
6898 SD_INIT(sd, NODE); 6931 SD_INIT(sd, NODE);
6932 set_domain_attribute(sd, attr);
6899 sched_domain_node_span(cpu_to_node(i), &sd->span); 6933 sched_domain_node_span(cpu_to_node(i), &sd->span);
6900 sd->parent = p; 6934 sd->parent = p;
6901 if (p) 6935 if (p)
@@ -6906,6 +6940,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6906 p = sd; 6940 p = sd;
6907 sd = &per_cpu(phys_domains, i); 6941 sd = &per_cpu(phys_domains, i);
6908 SD_INIT(sd, CPU); 6942 SD_INIT(sd, CPU);
6943 set_domain_attribute(sd, attr);
6909 sd->span = *nodemask; 6944 sd->span = *nodemask;
6910 sd->parent = p; 6945 sd->parent = p;
6911 if (p) 6946 if (p)
@@ -6916,6 +6951,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6916 p = sd; 6951 p = sd;
6917 sd = &per_cpu(core_domains, i); 6952 sd = &per_cpu(core_domains, i);
6918 SD_INIT(sd, MC); 6953 SD_INIT(sd, MC);
6954 set_domain_attribute(sd, attr);
6919 sd->span = cpu_coregroup_map(i); 6955 sd->span = cpu_coregroup_map(i);
6920 cpus_and(sd->span, sd->span, *cpu_map); 6956 cpus_and(sd->span, sd->span, *cpu_map);
6921 sd->parent = p; 6957 sd->parent = p;
@@ -6927,6 +6963,7 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6927 p = sd; 6963 p = sd;
6928 sd = &per_cpu(cpu_domains, i); 6964 sd = &per_cpu(cpu_domains, i);
6929 SD_INIT(sd, SIBLING); 6965 SD_INIT(sd, SIBLING);
6966 set_domain_attribute(sd, attr);
6930 sd->span = per_cpu(cpu_sibling_map, i); 6967 sd->span = per_cpu(cpu_sibling_map, i);
6931 cpus_and(sd->span, sd->span, *cpu_map); 6968 cpus_and(sd->span, sd->span, *cpu_map);
6932 sd->parent = p; 6969 sd->parent = p;
@@ -7124,8 +7161,15 @@ error:
7124#endif 7161#endif
7125} 7162}
7126 7163
7164static int build_sched_domains(const cpumask_t *cpu_map)
7165{
7166 return __build_sched_domains(cpu_map, NULL);
7167}
7168
7127static cpumask_t *doms_cur; /* current sched domains */ 7169static cpumask_t *doms_cur; /* current sched domains */
7128static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 7170static int ndoms_cur; /* number of sched domains in 'doms_cur' */
7171static struct sched_domain_attr *dattr_cur; /* attribues of custom domains
7172 in 'doms_cur' */
7129 7173
7130/* 7174/*
7131 * Special case: If a kmalloc of a doms_cur partition (array of 7175 * Special case: If a kmalloc of a doms_cur partition (array of
@@ -7153,6 +7197,7 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
7153 if (!doms_cur) 7197 if (!doms_cur)
7154 doms_cur = &fallback_doms; 7198 doms_cur = &fallback_doms;
7155 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); 7199 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map);
7200 dattr_cur = NULL;
7156 err = build_sched_domains(doms_cur); 7201 err = build_sched_domains(doms_cur);
7157 register_sched_domain_sysctl(); 7202 register_sched_domain_sysctl();
7158 7203
@@ -7182,6 +7227,22 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7182 arch_destroy_sched_domains(cpu_map, &tmpmask); 7227 arch_destroy_sched_domains(cpu_map, &tmpmask);
7183} 7228}
7184 7229
7230/* handle null as "default" */
7231static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7232 struct sched_domain_attr *new, int idx_new)
7233{
7234 struct sched_domain_attr tmp;
7235
7236 /* fast path */
7237 if (!new && !cur)
7238 return 1;
7239
7240 tmp = SD_ATTR_INIT;
7241 return !memcmp(cur ? (cur + idx_cur) : &tmp,
7242 new ? (new + idx_new) : &tmp,
7243 sizeof(struct sched_domain_attr));
7244}
7245
7185/* 7246/*
7186 * Partition sched domains as specified by the 'ndoms_new' 7247 * Partition sched domains as specified by the 'ndoms_new'
7187 * cpumasks in the array doms_new[] of cpumasks. This compares 7248 * cpumasks in the array doms_new[] of cpumasks. This compares
@@ -7203,7 +7264,8 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7203 * 7264 *
7204 * Call with hotplug lock held 7265 * Call with hotplug lock held
7205 */ 7266 */
7206void partition_sched_domains(int ndoms_new, cpumask_t *doms_new) 7267void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7268 struct sched_domain_attr *dattr_new)
7207{ 7269{
7208 int i, j; 7270 int i, j;
7209 7271
@@ -7216,12 +7278,14 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new)
7216 ndoms_new = 1; 7278 ndoms_new = 1;
7217 doms_new = &fallback_doms; 7279 doms_new = &fallback_doms;
7218 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); 7280 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7281 dattr_new = NULL;
7219 } 7282 }
7220 7283
7221 /* Destroy deleted domains */ 7284 /* Destroy deleted domains */
7222 for (i = 0; i < ndoms_cur; i++) { 7285 for (i = 0; i < ndoms_cur; i++) {
7223 for (j = 0; j < ndoms_new; j++) { 7286 for (j = 0; j < ndoms_new; j++) {
7224 if (cpus_equal(doms_cur[i], doms_new[j])) 7287 if (cpus_equal(doms_cur[i], doms_new[j])
7288 && dattrs_equal(dattr_cur, i, dattr_new, j))
7225 goto match1; 7289 goto match1;
7226 } 7290 }
7227 /* no match - a current sched domain not in new doms_new[] */ 7291 /* no match - a current sched domain not in new doms_new[] */
@@ -7233,11 +7297,13 @@ match1:
7233 /* Build new domains */ 7297 /* Build new domains */
7234 for (i = 0; i < ndoms_new; i++) { 7298 for (i = 0; i < ndoms_new; i++) {
7235 for (j = 0; j < ndoms_cur; j++) { 7299 for (j = 0; j < ndoms_cur; j++) {
7236 if (cpus_equal(doms_new[i], doms_cur[j])) 7300 if (cpus_equal(doms_new[i], doms_cur[j])
7301 && dattrs_equal(dattr_new, i, dattr_cur, j))
7237 goto match2; 7302 goto match2;
7238 } 7303 }
7239 /* no match - add a new doms_new */ 7304 /* no match - add a new doms_new */
7240 build_sched_domains(doms_new + i); 7305 __build_sched_domains(doms_new + i,
7306 dattr_new ? dattr_new + i : NULL);
7241match2: 7307match2:
7242 ; 7308 ;
7243 } 7309 }
@@ -7245,7 +7311,9 @@ match2:
7245 /* Remember the new sched domains */ 7311 /* Remember the new sched domains */
7246 if (doms_cur != &fallback_doms) 7312 if (doms_cur != &fallback_doms)
7247 kfree(doms_cur); 7313 kfree(doms_cur);
7314 kfree(dattr_cur); /* kfree(NULL) is safe */
7248 doms_cur = doms_new; 7315 doms_cur = doms_new;
7316 dattr_cur = dattr_new;
7249 ndoms_cur = ndoms_new; 7317 ndoms_cur = ndoms_new;
7250 7318
7251 register_sched_domain_sysctl(); 7319 register_sched_domain_sysctl();
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index de4250c53a19..b43748efaa7f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -940,7 +940,9 @@ static int wake_idle(int cpu, struct task_struct *p)
940 return cpu; 940 return cpu;
941 941
942 for_each_domain(cpu, sd) { 942 for_each_domain(cpu, sd) {
943 if (sd->flags & SD_WAKE_IDLE) { 943 if ((sd->flags & SD_WAKE_IDLE)
944 || ((sd->flags & SD_WAKE_IDLE_FAR)
945 && !task_hot(p, task_rq(p)->clock, sd))) {
944 cpus_and(tmp, sd->span, p->cpus_allowed); 946 cpus_and(tmp, sd->span, p->cpus_allowed);
945 for_each_cpu_mask(i, tmp) { 947 for_each_cpu_mask(i, tmp) {
946 if (idle_cpu(i)) { 948 if (idle_cpu(i)) {