diff options
-rw-r--r-- | Documentation/feature-removal-schedule.txt | 15 | ||||
-rw-r--r-- | Documentation/filesystems/proc.txt | 3 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 2 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 5 | ||||
-rw-r--r-- | fs/proc/array.c | 11 | ||||
-rw-r--r-- | fs/proc/stat.c | 19 | ||||
-rw-r--r-- | include/linux/kernel_stat.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 12 | ||||
-rw-r--r-- | kernel/cpuset.c | 27 | ||||
-rw-r--r-- | kernel/kgdb.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 180 | ||||
-rw-r--r-- | kernel/sched_debug.c | 4 | ||||
-rw-r--r-- | kernel/sched_fair.c | 65 | ||||
-rw-r--r-- | kernel/sched_rt.c | 61 |
14 files changed, 263 insertions, 144 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index bc693fffabe0..f613df8ec7bf 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt | |||
@@ -6,6 +6,21 @@ be removed from this file. | |||
6 | 6 | ||
7 | --------------------------- | 7 | --------------------------- |
8 | 8 | ||
9 | What: USER_SCHED | ||
10 | When: 2.6.34 | ||
11 | |||
12 | Why: USER_SCHED was implemented as a proof of concept for group scheduling. | ||
13 | The effect of USER_SCHED can already be achieved from userspace with | ||
14 | the help of libcgroup. The removal of USER_SCHED will also simplify | ||
15 | the scheduler code with the removal of one major ifdef. There are also | ||
16 | issues USER_SCHED has with USER_NS. A decision was taken not to fix | ||
17 | those and instead remove USER_SCHED. Also new group scheduling | ||
18 | features will not be implemented for USER_SCHED. | ||
19 | |||
20 | Who: Dhaval Giani <dhaval@linux.vnet.ibm.com> | ||
21 | |||
22 | --------------------------- | ||
23 | |||
9 | What: PRISM54 | 24 | What: PRISM54 |
10 | When: 2.6.34 | 25 | When: 2.6.34 |
11 | 26 | ||
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 2c48f945546b..4af0018533f2 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1072,7 +1072,8 @@ second). The meanings of the columns are as follows, from left to right: | |||
1072 | - irq: servicing interrupts | 1072 | - irq: servicing interrupts |
1073 | - softirq: servicing softirqs | 1073 | - softirq: servicing softirqs |
1074 | - steal: involuntary wait | 1074 | - steal: involuntary wait |
1075 | - guest: running a guest | 1075 | - guest: running a normal guest |
1076 | - guest_nice: running a niced guest | ||
1076 | 1077 | ||
1077 | The "intr" line gives counts of interrupts serviced since boot time, for each | 1078 | The "intr" line gives counts of interrupts serviced since boot time, for each |
1078 | of the possible system interrupts. The first column is the total of all | 1079 | of the possible system interrupts. The first column is the total of all |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 9107b387e91f..f2a9507b27b2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2182,6 +2182,8 @@ and is between 256 and 4096 characters. It is defined in the file | |||
2182 | 2182 | ||
2183 | sbni= [NET] Granch SBNI12 leased line adapter | 2183 | sbni= [NET] Granch SBNI12 leased line adapter |
2184 | 2184 | ||
2185 | sched_debug [KNL] Enables verbose scheduler debug messages. | ||
2186 | |||
2185 | sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver | 2187 | sc1200wdt= [HW,WDT] SC1200 WDT (watchdog) driver |
2186 | Format: <io>[,<timeout>[,<isapnp>]] | 2188 | Format: <io>[,<timeout>[,<isapnp>]] |
2187 | 2189 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa490..93c501dc2496 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -406,11 +406,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
406 | * This won't pick up thread selector changes, but I guess that is ok. | 406 | * This won't pick up thread selector changes, but I guess that is ok. |
407 | */ | 407 | */ |
408 | savesegment(es, prev->es); | 408 | savesegment(es, prev->es); |
409 | if (unlikely(next->es | prev->es)) | 409 | if (next->es | prev->es) |
410 | loadsegment(es, next->es); | 410 | loadsegment(es, next->es); |
411 | |||
412 | savesegment(ds, prev->ds); | 411 | savesegment(ds, prev->ds); |
413 | if (unlikely(next->ds | prev->ds)) | 412 | if (next->ds | prev->ds) |
414 | loadsegment(ds, next->ds); | 413 | loadsegment(ds, next->ds); |
415 | 414 | ||
416 | 415 | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 07f77a7945c3..e209f64ab27b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -410,6 +410,16 @@ static void task_show_stack_usage(struct seq_file *m, struct task_struct *task) | |||
410 | } | 410 | } |
411 | #endif /* CONFIG_MMU */ | 411 | #endif /* CONFIG_MMU */ |
412 | 412 | ||
413 | static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) | ||
414 | { | ||
415 | seq_printf(m, "Cpus_allowed:\t"); | ||
416 | seq_cpumask(m, &task->cpus_allowed); | ||
417 | seq_printf(m, "\n"); | ||
418 | seq_printf(m, "Cpus_allowed_list:\t"); | ||
419 | seq_cpumask_list(m, &task->cpus_allowed); | ||
420 | seq_printf(m, "\n"); | ||
421 | } | ||
422 | |||
413 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | 423 | int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, |
414 | struct pid *pid, struct task_struct *task) | 424 | struct pid *pid, struct task_struct *task) |
415 | { | 425 | { |
@@ -424,6 +434,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
424 | } | 434 | } |
425 | task_sig(m, task); | 435 | task_sig(m, task); |
426 | task_cap(m, task); | 436 | task_cap(m, task); |
437 | task_cpus_allowed(m, task); | ||
427 | cpuset_task_status_allowed(m, task); | 438 | cpuset_task_status_allowed(m, task); |
428 | #if defined(CONFIG_S390) | 439 | #if defined(CONFIG_S390) |
429 | task_show_regs(m, task); | 440 | task_show_regs(m, task); |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7cc726c6d70a..b9b7aad2003d 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -27,7 +27,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
27 | int i, j; | 27 | int i, j; |
28 | unsigned long jif; | 28 | unsigned long jif; |
29 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; | 29 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; |
30 | cputime64_t guest; | 30 | cputime64_t guest, guest_nice; |
31 | u64 sum = 0; | 31 | u64 sum = 0; |
32 | u64 sum_softirq = 0; | 32 | u64 sum_softirq = 0; |
33 | unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; | 33 | unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; |
@@ -36,7 +36,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
36 | 36 | ||
37 | user = nice = system = idle = iowait = | 37 | user = nice = system = idle = iowait = |
38 | irq = softirq = steal = cputime64_zero; | 38 | irq = softirq = steal = cputime64_zero; |
39 | guest = cputime64_zero; | 39 | guest = guest_nice = cputime64_zero; |
40 | getboottime(&boottime); | 40 | getboottime(&boottime); |
41 | jif = boottime.tv_sec; | 41 | jif = boottime.tv_sec; |
42 | 42 | ||
@@ -51,6 +51,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
51 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | 51 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); |
52 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); | 52 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); |
53 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); | 53 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); |
54 | guest_nice = cputime64_add(guest_nice, | ||
55 | kstat_cpu(i).cpustat.guest_nice); | ||
54 | for_each_irq_nr(j) { | 56 | for_each_irq_nr(j) { |
55 | sum += kstat_irqs_cpu(j, i); | 57 | sum += kstat_irqs_cpu(j, i); |
56 | } | 58 | } |
@@ -65,7 +67,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
65 | } | 67 | } |
66 | sum += arch_irq_stat(); | 68 | sum += arch_irq_stat(); |
67 | 69 | ||
68 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | 70 | seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu " |
71 | "%llu\n", | ||
69 | (unsigned long long)cputime64_to_clock_t(user), | 72 | (unsigned long long)cputime64_to_clock_t(user), |
70 | (unsigned long long)cputime64_to_clock_t(nice), | 73 | (unsigned long long)cputime64_to_clock_t(nice), |
71 | (unsigned long long)cputime64_to_clock_t(system), | 74 | (unsigned long long)cputime64_to_clock_t(system), |
@@ -74,7 +77,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
74 | (unsigned long long)cputime64_to_clock_t(irq), | 77 | (unsigned long long)cputime64_to_clock_t(irq), |
75 | (unsigned long long)cputime64_to_clock_t(softirq), | 78 | (unsigned long long)cputime64_to_clock_t(softirq), |
76 | (unsigned long long)cputime64_to_clock_t(steal), | 79 | (unsigned long long)cputime64_to_clock_t(steal), |
77 | (unsigned long long)cputime64_to_clock_t(guest)); | 80 | (unsigned long long)cputime64_to_clock_t(guest), |
81 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | ||
78 | for_each_online_cpu(i) { | 82 | for_each_online_cpu(i) { |
79 | 83 | ||
80 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ | 84 | /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ |
@@ -88,8 +92,10 @@ static int show_stat(struct seq_file *p, void *v) | |||
88 | softirq = kstat_cpu(i).cpustat.softirq; | 92 | softirq = kstat_cpu(i).cpustat.softirq; |
89 | steal = kstat_cpu(i).cpustat.steal; | 93 | steal = kstat_cpu(i).cpustat.steal; |
90 | guest = kstat_cpu(i).cpustat.guest; | 94 | guest = kstat_cpu(i).cpustat.guest; |
95 | guest_nice = kstat_cpu(i).cpustat.guest_nice; | ||
91 | seq_printf(p, | 96 | seq_printf(p, |
92 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", | 97 | "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu " |
98 | "%llu\n", | ||
93 | i, | 99 | i, |
94 | (unsigned long long)cputime64_to_clock_t(user), | 100 | (unsigned long long)cputime64_to_clock_t(user), |
95 | (unsigned long long)cputime64_to_clock_t(nice), | 101 | (unsigned long long)cputime64_to_clock_t(nice), |
@@ -99,7 +105,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
99 | (unsigned long long)cputime64_to_clock_t(irq), | 105 | (unsigned long long)cputime64_to_clock_t(irq), |
100 | (unsigned long long)cputime64_to_clock_t(softirq), | 106 | (unsigned long long)cputime64_to_clock_t(softirq), |
101 | (unsigned long long)cputime64_to_clock_t(steal), | 107 | (unsigned long long)cputime64_to_clock_t(steal), |
102 | (unsigned long long)cputime64_to_clock_t(guest)); | 108 | (unsigned long long)cputime64_to_clock_t(guest), |
109 | (unsigned long long)cputime64_to_clock_t(guest_nice)); | ||
103 | } | 110 | } |
104 | seq_printf(p, "intr %llu", (unsigned long long)sum); | 111 | seq_printf(p, "intr %llu", (unsigned long long)sum); |
105 | 112 | ||
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 348fa8874b52..c059044bc6dc 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -25,6 +25,7 @@ struct cpu_usage_stat { | |||
25 | cputime64_t iowait; | 25 | cputime64_t iowait; |
26 | cputime64_t steal; | 26 | cputime64_t steal; |
27 | cputime64_t guest; | 27 | cputime64_t guest; |
28 | cputime64_t guest_nice; | ||
28 | }; | 29 | }; |
29 | 30 | ||
30 | struct kernel_stat { | 31 | struct kernel_stat { |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 75e6e60bf583..78ba664474f3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void); | |||
145 | 145 | ||
146 | 146 | ||
147 | extern void calc_global_load(void); | 147 | extern void calc_global_load(void); |
148 | extern u64 cpu_nr_migrations(int cpu); | ||
149 | 148 | ||
150 | extern unsigned long get_parent_ip(unsigned long addr); | 149 | extern unsigned long get_parent_ip(unsigned long addr); |
151 | 150 | ||
@@ -171,8 +170,6 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
171 | } | 170 | } |
172 | #endif | 171 | #endif |
173 | 172 | ||
174 | extern unsigned long long time_sync_thresh; | ||
175 | |||
176 | /* | 173 | /* |
177 | * Task state bitmask. NOTE! These bits are also | 174 | * Task state bitmask. NOTE! These bits are also |
178 | * encoded in fs/proc/array.c: get_task_state(). | 175 | * encoded in fs/proc/array.c: get_task_state(). |
@@ -349,7 +346,6 @@ extern signed long schedule_timeout(signed long timeout); | |||
349 | extern signed long schedule_timeout_interruptible(signed long timeout); | 346 | extern signed long schedule_timeout_interruptible(signed long timeout); |
350 | extern signed long schedule_timeout_killable(signed long timeout); | 347 | extern signed long schedule_timeout_killable(signed long timeout); |
351 | extern signed long schedule_timeout_uninterruptible(signed long timeout); | 348 | extern signed long schedule_timeout_uninterruptible(signed long timeout); |
352 | asmlinkage void __schedule(void); | ||
353 | asmlinkage void schedule(void); | 349 | asmlinkage void schedule(void); |
354 | extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); | 350 | extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner); |
355 | 351 | ||
@@ -1013,9 +1009,13 @@ static inline struct cpumask *sched_domain_span(struct sched_domain *sd) | |||
1013 | return to_cpumask(sd->span); | 1009 | return to_cpumask(sd->span); |
1014 | } | 1010 | } |
1015 | 1011 | ||
1016 | extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | 1012 | extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
1017 | struct sched_domain_attr *dattr_new); | 1013 | struct sched_domain_attr *dattr_new); |
1018 | 1014 | ||
1015 | /* Allocate an array of sched domains, for partition_sched_domains(). */ | ||
1016 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms); | ||
1017 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms); | ||
1018 | |||
1019 | /* Test a flag in parent sched domain */ | 1019 | /* Test a flag in parent sched domain */ |
1020 | static inline int test_sd_parent(struct sched_domain *sd, int flag) | 1020 | static inline int test_sd_parent(struct sched_domain *sd, int flag) |
1021 | { | 1021 | { |
@@ -1033,7 +1033,7 @@ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); | |||
1033 | struct sched_domain_attr; | 1033 | struct sched_domain_attr; |
1034 | 1034 | ||
1035 | static inline void | 1035 | static inline void |
1036 | partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | 1036 | partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
1037 | struct sched_domain_attr *dattr_new) | 1037 | struct sched_domain_attr *dattr_new) |
1038 | { | 1038 | { |
1039 | } | 1039 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index b5cb469d2545..3cf2183b472d 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -537,8 +537,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) | |||
537 | * element of the partition (one sched domain) to be passed to | 537 | * element of the partition (one sched domain) to be passed to |
538 | * partition_sched_domains(). | 538 | * partition_sched_domains(). |
539 | */ | 539 | */ |
540 | /* FIXME: see the FIXME in partition_sched_domains() */ | 540 | static int generate_sched_domains(cpumask_var_t **domains, |
541 | static int generate_sched_domains(struct cpumask **domains, | ||
542 | struct sched_domain_attr **attributes) | 541 | struct sched_domain_attr **attributes) |
543 | { | 542 | { |
544 | LIST_HEAD(q); /* queue of cpusets to be scanned */ | 543 | LIST_HEAD(q); /* queue of cpusets to be scanned */ |
@@ -546,7 +545,7 @@ static int generate_sched_domains(struct cpumask **domains, | |||
546 | struct cpuset **csa; /* array of all cpuset ptrs */ | 545 | struct cpuset **csa; /* array of all cpuset ptrs */ |
547 | int csn; /* how many cpuset ptrs in csa so far */ | 546 | int csn; /* how many cpuset ptrs in csa so far */ |
548 | int i, j, k; /* indices for partition finding loops */ | 547 | int i, j, k; /* indices for partition finding loops */ |
549 | struct cpumask *doms; /* resulting partition; i.e. sched domains */ | 548 | cpumask_var_t *doms; /* resulting partition; i.e. sched domains */ |
550 | struct sched_domain_attr *dattr; /* attributes for custom domains */ | 549 | struct sched_domain_attr *dattr; /* attributes for custom domains */ |
551 | int ndoms = 0; /* number of sched domains in result */ | 550 | int ndoms = 0; /* number of sched domains in result */ |
552 | int nslot; /* next empty doms[] struct cpumask slot */ | 551 | int nslot; /* next empty doms[] struct cpumask slot */ |
@@ -557,7 +556,8 @@ static int generate_sched_domains(struct cpumask **domains, | |||
557 | 556 | ||
558 | /* Special case for the 99% of systems with one, full, sched domain */ | 557 | /* Special case for the 99% of systems with one, full, sched domain */ |
559 | if (is_sched_load_balance(&top_cpuset)) { | 558 | if (is_sched_load_balance(&top_cpuset)) { |
560 | doms = kmalloc(cpumask_size(), GFP_KERNEL); | 559 | ndoms = 1; |
560 | doms = alloc_sched_domains(ndoms); | ||
561 | if (!doms) | 561 | if (!doms) |
562 | goto done; | 562 | goto done; |
563 | 563 | ||
@@ -566,9 +566,8 @@ static int generate_sched_domains(struct cpumask **domains, | |||
566 | *dattr = SD_ATTR_INIT; | 566 | *dattr = SD_ATTR_INIT; |
567 | update_domain_attr_tree(dattr, &top_cpuset); | 567 | update_domain_attr_tree(dattr, &top_cpuset); |
568 | } | 568 | } |
569 | cpumask_copy(doms, top_cpuset.cpus_allowed); | 569 | cpumask_copy(doms[0], top_cpuset.cpus_allowed); |
570 | 570 | ||
571 | ndoms = 1; | ||
572 | goto done; | 571 | goto done; |
573 | } | 572 | } |
574 | 573 | ||
@@ -636,7 +635,7 @@ restart: | |||
636 | * Now we know how many domains to create. | 635 | * Now we know how many domains to create. |
637 | * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. | 636 | * Convert <csn, csa> to <ndoms, doms> and populate cpu masks. |
638 | */ | 637 | */ |
639 | doms = kmalloc(ndoms * cpumask_size(), GFP_KERNEL); | 638 | doms = alloc_sched_domains(ndoms); |
640 | if (!doms) | 639 | if (!doms) |
641 | goto done; | 640 | goto done; |
642 | 641 | ||
@@ -656,7 +655,7 @@ restart: | |||
656 | continue; | 655 | continue; |
657 | } | 656 | } |
658 | 657 | ||
659 | dp = doms + nslot; | 658 | dp = doms[nslot]; |
660 | 659 | ||
661 | if (nslot == ndoms) { | 660 | if (nslot == ndoms) { |
662 | static int warnings = 10; | 661 | static int warnings = 10; |
@@ -718,7 +717,7 @@ done: | |||
718 | static void do_rebuild_sched_domains(struct work_struct *unused) | 717 | static void do_rebuild_sched_domains(struct work_struct *unused) |
719 | { | 718 | { |
720 | struct sched_domain_attr *attr; | 719 | struct sched_domain_attr *attr; |
721 | struct cpumask *doms; | 720 | cpumask_var_t *doms; |
722 | int ndoms; | 721 | int ndoms; |
723 | 722 | ||
724 | get_online_cpus(); | 723 | get_online_cpus(); |
@@ -2052,7 +2051,7 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, | |||
2052 | unsigned long phase, void *unused_cpu) | 2051 | unsigned long phase, void *unused_cpu) |
2053 | { | 2052 | { |
2054 | struct sched_domain_attr *attr; | 2053 | struct sched_domain_attr *attr; |
2055 | struct cpumask *doms; | 2054 | cpumask_var_t *doms; |
2056 | int ndoms; | 2055 | int ndoms; |
2057 | 2056 | ||
2058 | switch (phase) { | 2057 | switch (phase) { |
@@ -2537,15 +2536,9 @@ const struct file_operations proc_cpuset_operations = { | |||
2537 | }; | 2536 | }; |
2538 | #endif /* CONFIG_PROC_PID_CPUSET */ | 2537 | #endif /* CONFIG_PROC_PID_CPUSET */ |
2539 | 2538 | ||
2540 | /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */ | 2539 | /* Display task mems_allowed in /proc/<pid>/status file. */ |
2541 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) | 2540 | void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) |
2542 | { | 2541 | { |
2543 | seq_printf(m, "Cpus_allowed:\t"); | ||
2544 | seq_cpumask(m, &task->cpus_allowed); | ||
2545 | seq_printf(m, "\n"); | ||
2546 | seq_printf(m, "Cpus_allowed_list:\t"); | ||
2547 | seq_cpumask_list(m, &task->cpus_allowed); | ||
2548 | seq_printf(m, "\n"); | ||
2549 | seq_printf(m, "Mems_allowed:\t"); | 2542 | seq_printf(m, "Mems_allowed:\t"); |
2550 | seq_nodemask(m, &task->mems_allowed); | 2543 | seq_nodemask(m, &task->mems_allowed); |
2551 | seq_printf(m, "\n"); | 2544 | seq_printf(m, "\n"); |
diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 9147a3190c9d..7d7014634022 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c | |||
@@ -870,7 +870,7 @@ static void gdb_cmd_getregs(struct kgdb_state *ks) | |||
870 | 870 | ||
871 | /* | 871 | /* |
872 | * All threads that don't have debuggerinfo should be | 872 | * All threads that don't have debuggerinfo should be |
873 | * in __schedule() sleeping, since all other CPUs | 873 | * in schedule() sleeping, since all other CPUs |
874 | * are in kgdb_wait, and thus have debuggerinfo. | 874 | * are in kgdb_wait, and thus have debuggerinfo. |
875 | */ | 875 | */ |
876 | if (local_debuggerinfo) { | 876 | if (local_debuggerinfo) { |
diff --git a/kernel/sched.c b/kernel/sched.c index 3c91f110fc62..315ba4059f93 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -535,14 +535,12 @@ struct rq { | |||
535 | #define CPU_LOAD_IDX_MAX 5 | 535 | #define CPU_LOAD_IDX_MAX 5 |
536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
537 | #ifdef CONFIG_NO_HZ | 537 | #ifdef CONFIG_NO_HZ |
538 | unsigned long last_tick_seen; | ||
539 | unsigned char in_nohz_recently; | 538 | unsigned char in_nohz_recently; |
540 | #endif | 539 | #endif |
541 | /* capture load from *all* tasks on this cpu: */ | 540 | /* capture load from *all* tasks on this cpu: */ |
542 | struct load_weight load; | 541 | struct load_weight load; |
543 | unsigned long nr_load_updates; | 542 | unsigned long nr_load_updates; |
544 | u64 nr_switches; | 543 | u64 nr_switches; |
545 | u64 nr_migrations_in; | ||
546 | 544 | ||
547 | struct cfs_rq cfs; | 545 | struct cfs_rq cfs; |
548 | struct rt_rq rt; | 546 | struct rt_rq rt; |
@@ -591,6 +589,8 @@ struct rq { | |||
591 | 589 | ||
592 | u64 rt_avg; | 590 | u64 rt_avg; |
593 | u64 age_stamp; | 591 | u64 age_stamp; |
592 | u64 idle_stamp; | ||
593 | u64 avg_idle; | ||
594 | #endif | 594 | #endif |
595 | 595 | ||
596 | /* calc_load related fields */ | 596 | /* calc_load related fields */ |
@@ -772,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
772 | if (!sched_feat_names[i]) | 772 | if (!sched_feat_names[i]) |
773 | return -EINVAL; | 773 | return -EINVAL; |
774 | 774 | ||
775 | filp->f_pos += cnt; | 775 | *ppos += cnt; |
776 | 776 | ||
777 | return cnt; | 777 | return cnt; |
778 | } | 778 | } |
@@ -2079,7 +2079,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2079 | #endif | 2079 | #endif |
2080 | if (old_cpu != new_cpu) { | 2080 | if (old_cpu != new_cpu) { |
2081 | p->se.nr_migrations++; | 2081 | p->se.nr_migrations++; |
2082 | new_rq->nr_migrations_in++; | ||
2083 | #ifdef CONFIG_SCHEDSTATS | 2082 | #ifdef CONFIG_SCHEDSTATS |
2084 | if (task_hot(p, old_rq->clock, NULL)) | 2083 | if (task_hot(p, old_rq->clock, NULL)) |
2085 | schedstat_inc(p, se.nr_forced2_migrations); | 2084 | schedstat_inc(p, se.nr_forced2_migrations); |
@@ -2443,6 +2442,17 @@ out_running: | |||
2443 | #ifdef CONFIG_SMP | 2442 | #ifdef CONFIG_SMP |
2444 | if (p->sched_class->task_wake_up) | 2443 | if (p->sched_class->task_wake_up) |
2445 | p->sched_class->task_wake_up(rq, p); | 2444 | p->sched_class->task_wake_up(rq, p); |
2445 | |||
2446 | if (unlikely(rq->idle_stamp)) { | ||
2447 | u64 delta = rq->clock - rq->idle_stamp; | ||
2448 | u64 max = 2*sysctl_sched_migration_cost; | ||
2449 | |||
2450 | if (delta > max) | ||
2451 | rq->avg_idle = max; | ||
2452 | else | ||
2453 | update_avg(&rq->avg_idle, delta); | ||
2454 | rq->idle_stamp = 0; | ||
2455 | } | ||
2446 | #endif | 2456 | #endif |
2447 | out: | 2457 | out: |
2448 | task_rq_unlock(rq, &flags); | 2458 | task_rq_unlock(rq, &flags); |
@@ -2855,14 +2865,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2855 | */ | 2865 | */ |
2856 | arch_start_context_switch(prev); | 2866 | arch_start_context_switch(prev); |
2857 | 2867 | ||
2858 | if (unlikely(!mm)) { | 2868 | if (likely(!mm)) { |
2859 | next->active_mm = oldmm; | 2869 | next->active_mm = oldmm; |
2860 | atomic_inc(&oldmm->mm_count); | 2870 | atomic_inc(&oldmm->mm_count); |
2861 | enter_lazy_tlb(oldmm, next); | 2871 | enter_lazy_tlb(oldmm, next); |
2862 | } else | 2872 | } else |
2863 | switch_mm(oldmm, mm, next); | 2873 | switch_mm(oldmm, mm, next); |
2864 | 2874 | ||
2865 | if (unlikely(!prev->mm)) { | 2875 | if (likely(!prev->mm)) { |
2866 | prev->active_mm = NULL; | 2876 | prev->active_mm = NULL; |
2867 | rq->prev_mm = oldmm; | 2877 | rq->prev_mm = oldmm; |
2868 | } | 2878 | } |
@@ -3025,15 +3035,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
3025 | } | 3035 | } |
3026 | 3036 | ||
3027 | /* | 3037 | /* |
3028 | * Externally visible per-cpu scheduler statistics: | ||
3029 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
3030 | */ | ||
3031 | u64 cpu_nr_migrations(int cpu) | ||
3032 | { | ||
3033 | return cpu_rq(cpu)->nr_migrations_in; | ||
3034 | } | ||
3035 | |||
3036 | /* | ||
3037 | * Update rq->cpu_load[] statistics. This function is usually called every | 3038 | * Update rq->cpu_load[] statistics. This function is usually called every |
3038 | * scheduler tick (TICK_NSEC). | 3039 | * scheduler tick (TICK_NSEC). |
3039 | */ | 3040 | */ |
@@ -4133,7 +4134,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4133 | unsigned long flags; | 4134 | unsigned long flags; |
4134 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4135 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4135 | 4136 | ||
4136 | cpumask_setall(cpus); | 4137 | cpumask_copy(cpus, cpu_online_mask); |
4137 | 4138 | ||
4138 | /* | 4139 | /* |
4139 | * When power savings policy is enabled for the parent domain, idle | 4140 | * When power savings policy is enabled for the parent domain, idle |
@@ -4296,7 +4297,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4296 | int all_pinned = 0; | 4297 | int all_pinned = 0; |
4297 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4298 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4298 | 4299 | ||
4299 | cpumask_setall(cpus); | 4300 | cpumask_copy(cpus, cpu_online_mask); |
4300 | 4301 | ||
4301 | /* | 4302 | /* |
4302 | * When power savings policy is enabled for the parent domain, idle | 4303 | * When power savings policy is enabled for the parent domain, idle |
@@ -4436,6 +4437,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4436 | int pulled_task = 0; | 4437 | int pulled_task = 0; |
4437 | unsigned long next_balance = jiffies + HZ; | 4438 | unsigned long next_balance = jiffies + HZ; |
4438 | 4439 | ||
4440 | this_rq->idle_stamp = this_rq->clock; | ||
4441 | |||
4442 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | ||
4443 | return; | ||
4444 | |||
4439 | for_each_domain(this_cpu, sd) { | 4445 | for_each_domain(this_cpu, sd) { |
4440 | unsigned long interval; | 4446 | unsigned long interval; |
4441 | 4447 | ||
@@ -4450,8 +4456,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4450 | interval = msecs_to_jiffies(sd->balance_interval); | 4456 | interval = msecs_to_jiffies(sd->balance_interval); |
4451 | if (time_after(next_balance, sd->last_balance + interval)) | 4457 | if (time_after(next_balance, sd->last_balance + interval)) |
4452 | next_balance = sd->last_balance + interval; | 4458 | next_balance = sd->last_balance + interval; |
4453 | if (pulled_task) | 4459 | if (pulled_task) { |
4460 | this_rq->idle_stamp = 0; | ||
4454 | break; | 4461 | break; |
4462 | } | ||
4455 | } | 4463 | } |
4456 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 4464 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
4457 | /* | 4465 | /* |
@@ -5053,8 +5061,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
5053 | p->gtime = cputime_add(p->gtime, cputime); | 5061 | p->gtime = cputime_add(p->gtime, cputime); |
5054 | 5062 | ||
5055 | /* Add guest time to cpustat. */ | 5063 | /* Add guest time to cpustat. */ |
5056 | cpustat->user = cputime64_add(cpustat->user, tmp); | 5064 | if (TASK_NICE(p) > 0) { |
5057 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | 5065 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
5066 | cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); | ||
5067 | } else { | ||
5068 | cpustat->user = cputime64_add(cpustat->user, tmp); | ||
5069 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | ||
5070 | } | ||
5058 | } | 5071 | } |
5059 | 5072 | ||
5060 | /* | 5073 | /* |
@@ -5179,41 +5192,45 @@ cputime_t task_stime(struct task_struct *p) | |||
5179 | return p->stime; | 5192 | return p->stime; |
5180 | } | 5193 | } |
5181 | #else | 5194 | #else |
5195 | |||
5196 | #ifndef nsecs_to_cputime | ||
5197 | # define nsecs_to_cputime(__nsecs) \ | ||
5198 | msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC)) | ||
5199 | #endif | ||
5200 | |||
5182 | cputime_t task_utime(struct task_struct *p) | 5201 | cputime_t task_utime(struct task_struct *p) |
5183 | { | 5202 | { |
5184 | clock_t utime = cputime_to_clock_t(p->utime), | 5203 | cputime_t utime = p->utime, total = utime + p->stime; |
5185 | total = utime + cputime_to_clock_t(p->stime); | ||
5186 | u64 temp; | 5204 | u64 temp; |
5187 | 5205 | ||
5188 | /* | 5206 | /* |
5189 | * Use CFS's precise accounting: | 5207 | * Use CFS's precise accounting: |
5190 | */ | 5208 | */ |
5191 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | 5209 | temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime); |
5192 | 5210 | ||
5193 | if (total) { | 5211 | if (total) { |
5194 | temp *= utime; | 5212 | temp *= utime; |
5195 | do_div(temp, total); | 5213 | do_div(temp, total); |
5196 | } | 5214 | } |
5197 | utime = (clock_t)temp; | 5215 | utime = (cputime_t)temp; |
5198 | 5216 | ||
5199 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | 5217 | p->prev_utime = max(p->prev_utime, utime); |
5200 | return p->prev_utime; | 5218 | return p->prev_utime; |
5201 | } | 5219 | } |
5202 | 5220 | ||
5203 | cputime_t task_stime(struct task_struct *p) | 5221 | cputime_t task_stime(struct task_struct *p) |
5204 | { | 5222 | { |
5205 | clock_t stime; | 5223 | cputime_t stime; |
5206 | 5224 | ||
5207 | /* | 5225 | /* |
5208 | * Use CFS's precise accounting. (we subtract utime from | 5226 | * Use CFS's precise accounting. (we subtract utime from |
5209 | * the total, to make sure the total observed by userspace | 5227 | * the total, to make sure the total observed by userspace |
5210 | * grows monotonically - apps rely on that): | 5228 | * grows monotonically - apps rely on that): |
5211 | */ | 5229 | */ |
5212 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | 5230 | stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p); |
5213 | cputime_to_clock_t(task_utime(p)); | ||
5214 | 5231 | ||
5215 | if (stime >= 0) | 5232 | if (stime >= 0) |
5216 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | 5233 | p->prev_stime = max(p->prev_stime, stime); |
5217 | 5234 | ||
5218 | return p->prev_stime; | 5235 | return p->prev_stime; |
5219 | } | 5236 | } |
@@ -6182,22 +6199,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
6182 | BUG_ON(p->se.on_rq); | 6199 | BUG_ON(p->se.on_rq); |
6183 | 6200 | ||
6184 | p->policy = policy; | 6201 | p->policy = policy; |
6185 | switch (p->policy) { | ||
6186 | case SCHED_NORMAL: | ||
6187 | case SCHED_BATCH: | ||
6188 | case SCHED_IDLE: | ||
6189 | p->sched_class = &fair_sched_class; | ||
6190 | break; | ||
6191 | case SCHED_FIFO: | ||
6192 | case SCHED_RR: | ||
6193 | p->sched_class = &rt_sched_class; | ||
6194 | break; | ||
6195 | } | ||
6196 | |||
6197 | p->rt_priority = prio; | 6202 | p->rt_priority = prio; |
6198 | p->normal_prio = normal_prio(p); | 6203 | p->normal_prio = normal_prio(p); |
6199 | /* we are holding p->pi_lock already */ | 6204 | /* we are holding p->pi_lock already */ |
6200 | p->prio = rt_mutex_getprio(p); | 6205 | p->prio = rt_mutex_getprio(p); |
6206 | if (rt_prio(p->prio)) | ||
6207 | p->sched_class = &rt_sched_class; | ||
6208 | else | ||
6209 | p->sched_class = &fair_sched_class; | ||
6201 | set_load_weight(p); | 6210 | set_load_weight(p); |
6202 | } | 6211 | } |
6203 | 6212 | ||
@@ -6942,7 +6951,7 @@ void show_state_filter(unsigned long state_filter) | |||
6942 | /* | 6951 | /* |
6943 | * Only show locks if all tasks are dumped: | 6952 | * Only show locks if all tasks are dumped: |
6944 | */ | 6953 | */ |
6945 | if (state_filter == -1) | 6954 | if (!state_filter) |
6946 | debug_show_all_locks(); | 6955 | debug_show_all_locks(); |
6947 | } | 6956 | } |
6948 | 6957 | ||
@@ -7747,6 +7756,16 @@ early_initcall(migration_init); | |||
7747 | 7756 | ||
7748 | #ifdef CONFIG_SCHED_DEBUG | 7757 | #ifdef CONFIG_SCHED_DEBUG |
7749 | 7758 | ||
7759 | static __read_mostly int sched_domain_debug_enabled; | ||
7760 | |||
7761 | static int __init sched_domain_debug_setup(char *str) | ||
7762 | { | ||
7763 | sched_domain_debug_enabled = 1; | ||
7764 | |||
7765 | return 0; | ||
7766 | } | ||
7767 | early_param("sched_debug", sched_domain_debug_setup); | ||
7768 | |||
7750 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 7769 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
7751 | struct cpumask *groupmask) | 7770 | struct cpumask *groupmask) |
7752 | { | 7771 | { |
@@ -7833,6 +7852,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
7833 | cpumask_var_t groupmask; | 7852 | cpumask_var_t groupmask; |
7834 | int level = 0; | 7853 | int level = 0; |
7835 | 7854 | ||
7855 | if (!sched_domain_debug_enabled) | ||
7856 | return; | ||
7857 | |||
7836 | if (!sd) { | 7858 | if (!sd) { |
7837 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); | 7859 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); |
7838 | return; | 7860 | return; |
@@ -8890,7 +8912,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) | |||
8890 | return __build_sched_domains(cpu_map, NULL); | 8912 | return __build_sched_domains(cpu_map, NULL); |
8891 | } | 8913 | } |
8892 | 8914 | ||
8893 | static struct cpumask *doms_cur; /* current sched domains */ | 8915 | static cpumask_var_t *doms_cur; /* current sched domains */ |
8894 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 8916 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
8895 | static struct sched_domain_attr *dattr_cur; | 8917 | static struct sched_domain_attr *dattr_cur; |
8896 | /* attribues of custom domains in 'doms_cur' */ | 8918 | /* attribues of custom domains in 'doms_cur' */ |
@@ -8912,6 +8934,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) | |||
8912 | return 0; | 8934 | return 0; |
8913 | } | 8935 | } |
8914 | 8936 | ||
8937 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms) | ||
8938 | { | ||
8939 | int i; | ||
8940 | cpumask_var_t *doms; | ||
8941 | |||
8942 | doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); | ||
8943 | if (!doms) | ||
8944 | return NULL; | ||
8945 | for (i = 0; i < ndoms; i++) { | ||
8946 | if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { | ||
8947 | free_sched_domains(doms, i); | ||
8948 | return NULL; | ||
8949 | } | ||
8950 | } | ||
8951 | return doms; | ||
8952 | } | ||
8953 | |||
8954 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) | ||
8955 | { | ||
8956 | unsigned int i; | ||
8957 | for (i = 0; i < ndoms; i++) | ||
8958 | free_cpumask_var(doms[i]); | ||
8959 | kfree(doms); | ||
8960 | } | ||
8961 | |||
8915 | /* | 8962 | /* |
8916 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 8963 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
8917 | * For now this just excludes isolated cpus, but could be used to | 8964 | * For now this just excludes isolated cpus, but could be used to |
@@ -8923,12 +8970,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) | |||
8923 | 8970 | ||
8924 | arch_update_cpu_topology(); | 8971 | arch_update_cpu_topology(); |
8925 | ndoms_cur = 1; | 8972 | ndoms_cur = 1; |
8926 | doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); | 8973 | doms_cur = alloc_sched_domains(ndoms_cur); |
8927 | if (!doms_cur) | 8974 | if (!doms_cur) |
8928 | doms_cur = fallback_doms; | 8975 | doms_cur = &fallback_doms; |
8929 | cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); | 8976 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
8930 | dattr_cur = NULL; | 8977 | dattr_cur = NULL; |
8931 | err = build_sched_domains(doms_cur); | 8978 | err = build_sched_domains(doms_cur[0]); |
8932 | register_sched_domain_sysctl(); | 8979 | register_sched_domain_sysctl(); |
8933 | 8980 | ||
8934 | return err; | 8981 | return err; |
@@ -8978,19 +9025,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8978 | * doms_new[] to the current sched domain partitioning, doms_cur[]. | 9025 | * doms_new[] to the current sched domain partitioning, doms_cur[]. |
8979 | * It destroys each deleted domain and builds each new domain. | 9026 | * It destroys each deleted domain and builds each new domain. |
8980 | * | 9027 | * |
8981 | * 'doms_new' is an array of cpumask's of length 'ndoms_new'. | 9028 | * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. |
8982 | * The masks don't intersect (don't overlap.) We should setup one | 9029 | * The masks don't intersect (don't overlap.) We should setup one |
8983 | * sched domain for each mask. CPUs not in any of the cpumasks will | 9030 | * sched domain for each mask. CPUs not in any of the cpumasks will |
8984 | * not be load balanced. If the same cpumask appears both in the | 9031 | * not be load balanced. If the same cpumask appears both in the |
8985 | * current 'doms_cur' domains and in the new 'doms_new', we can leave | 9032 | * current 'doms_cur' domains and in the new 'doms_new', we can leave |
8986 | * it as it is. | 9033 | * it as it is. |
8987 | * | 9034 | * |
8988 | * The passed in 'doms_new' should be kmalloc'd. This routine takes | 9035 | * The passed in 'doms_new' should be allocated using |
8989 | * ownership of it and will kfree it when done with it. If the caller | 9036 | * alloc_sched_domains. This routine takes ownership of it and will |
8990 | * failed the kmalloc call, then it can pass in doms_new == NULL && | 9037 | * free_sched_domains it when done with it. If the caller failed the |
8991 | * ndoms_new == 1, and partition_sched_domains() will fallback to | 9038 | * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, |
8992 | * the single partition 'fallback_doms', it also forces the domains | 9039 | * and partition_sched_domains() will fallback to the single partition |
8993 | * to be rebuilt. | 9040 | * 'fallback_doms', it also forces the domains to be rebuilt. |
8994 | * | 9041 | * |
8995 | * If doms_new == NULL it will be replaced with cpu_online_mask. | 9042 | * If doms_new == NULL it will be replaced with cpu_online_mask. |
8996 | * ndoms_new == 0 is a special case for destroying existing domains, | 9043 | * ndoms_new == 0 is a special case for destroying existing domains, |
@@ -8998,8 +9045,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8998 | * | 9045 | * |
8999 | * Call with hotplug lock held | 9046 | * Call with hotplug lock held |
9000 | */ | 9047 | */ |
9001 | /* FIXME: Change to struct cpumask *doms_new[] */ | 9048 | void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
9002 | void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | ||
9003 | struct sched_domain_attr *dattr_new) | 9049 | struct sched_domain_attr *dattr_new) |
9004 | { | 9050 | { |
9005 | int i, j, n; | 9051 | int i, j, n; |
@@ -9018,40 +9064,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
9018 | /* Destroy deleted domains */ | 9064 | /* Destroy deleted domains */ |
9019 | for (i = 0; i < ndoms_cur; i++) { | 9065 | for (i = 0; i < ndoms_cur; i++) { |
9020 | for (j = 0; j < n && !new_topology; j++) { | 9066 | for (j = 0; j < n && !new_topology; j++) { |
9021 | if (cpumask_equal(&doms_cur[i], &doms_new[j]) | 9067 | if (cpumask_equal(doms_cur[i], doms_new[j]) |
9022 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 9068 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
9023 | goto match1; | 9069 | goto match1; |
9024 | } | 9070 | } |
9025 | /* no match - a current sched domain not in new doms_new[] */ | 9071 | /* no match - a current sched domain not in new doms_new[] */ |
9026 | detach_destroy_domains(doms_cur + i); | 9072 | detach_destroy_domains(doms_cur[i]); |
9027 | match1: | 9073 | match1: |
9028 | ; | 9074 | ; |
9029 | } | 9075 | } |
9030 | 9076 | ||
9031 | if (doms_new == NULL) { | 9077 | if (doms_new == NULL) { |
9032 | ndoms_cur = 0; | 9078 | ndoms_cur = 0; |
9033 | doms_new = fallback_doms; | 9079 | doms_new = &fallback_doms; |
9034 | cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); | 9080 | cpumask_andnot(doms_new[0], cpu_online_mask, cpu_isolated_map); |
9035 | WARN_ON_ONCE(dattr_new); | 9081 | WARN_ON_ONCE(dattr_new); |
9036 | } | 9082 | } |
9037 | 9083 | ||
9038 | /* Build new domains */ | 9084 | /* Build new domains */ |
9039 | for (i = 0; i < ndoms_new; i++) { | 9085 | for (i = 0; i < ndoms_new; i++) { |
9040 | for (j = 0; j < ndoms_cur && !new_topology; j++) { | 9086 | for (j = 0; j < ndoms_cur && !new_topology; j++) { |
9041 | if (cpumask_equal(&doms_new[i], &doms_cur[j]) | 9087 | if (cpumask_equal(doms_new[i], doms_cur[j]) |
9042 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 9088 | && dattrs_equal(dattr_new, i, dattr_cur, j)) |
9043 | goto match2; | 9089 | goto match2; |
9044 | } | 9090 | } |
9045 | /* no match - add a new doms_new */ | 9091 | /* no match - add a new doms_new */ |
9046 | __build_sched_domains(doms_new + i, | 9092 | __build_sched_domains(doms_new[i], |
9047 | dattr_new ? dattr_new + i : NULL); | 9093 | dattr_new ? dattr_new + i : NULL); |
9048 | match2: | 9094 | match2: |
9049 | ; | 9095 | ; |
9050 | } | 9096 | } |
9051 | 9097 | ||
9052 | /* Remember the new sched domains */ | 9098 | /* Remember the new sched domains */ |
9053 | if (doms_cur != fallback_doms) | 9099 | if (doms_cur != &fallback_doms) |
9054 | kfree(doms_cur); | 9100 | free_sched_domains(doms_cur, ndoms_cur); |
9055 | kfree(dattr_cur); /* kfree(NULL) is safe */ | 9101 | kfree(dattr_cur); /* kfree(NULL) is safe */ |
9056 | doms_cur = doms_new; | 9102 | doms_cur = doms_new; |
9057 | dattr_cur = dattr_new; | 9103 | dattr_cur = dattr_new; |
@@ -9373,10 +9419,6 @@ void __init sched_init(void) | |||
9373 | #ifdef CONFIG_CPUMASK_OFFSTACK | 9419 | #ifdef CONFIG_CPUMASK_OFFSTACK |
9374 | alloc_size += num_possible_cpus() * cpumask_size(); | 9420 | alloc_size += num_possible_cpus() * cpumask_size(); |
9375 | #endif | 9421 | #endif |
9376 | /* | ||
9377 | * As sched_init() is called before page_alloc is setup, | ||
9378 | * we use alloc_bootmem(). | ||
9379 | */ | ||
9380 | if (alloc_size) { | 9422 | if (alloc_size) { |
9381 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 9423 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
9382 | 9424 | ||
@@ -9531,6 +9573,8 @@ void __init sched_init(void) | |||
9531 | rq->cpu = i; | 9573 | rq->cpu = i; |
9532 | rq->online = 0; | 9574 | rq->online = 0; |
9533 | rq->migration_thread = NULL; | 9575 | rq->migration_thread = NULL; |
9576 | rq->idle_stamp = 0; | ||
9577 | rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
9534 | INIT_LIST_HEAD(&rq->migration_queue); | 9578 | INIT_LIST_HEAD(&rq->migration_queue); |
9535 | rq_attach_root(rq, &def_root_domain); | 9579 | rq_attach_root(rq, &def_root_domain); |
9536 | #endif | 9580 | #endif |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index efb84409bc43..6988cf08f705 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -285,12 +285,16 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
285 | 285 | ||
286 | #ifdef CONFIG_SCHEDSTATS | 286 | #ifdef CONFIG_SCHEDSTATS |
287 | #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); | 287 | #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); |
288 | #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); | ||
288 | 289 | ||
289 | P(yld_count); | 290 | P(yld_count); |
290 | 291 | ||
291 | P(sched_switch); | 292 | P(sched_switch); |
292 | P(sched_count); | 293 | P(sched_count); |
293 | P(sched_goidle); | 294 | P(sched_goidle); |
295 | #ifdef CONFIG_SMP | ||
296 | P64(avg_idle); | ||
297 | #endif | ||
294 | 298 | ||
295 | P(ttwu_count); | 299 | P(ttwu_count); |
296 | P(ttwu_local); | 300 | P(ttwu_local); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 37087a7fac22..f61837ad336d 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1345,6 +1345,37 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | /* | 1347 | /* |
1348 | * Try and locate an idle CPU in the sched_domain. | ||
1349 | */ | ||
1350 | static int | ||
1351 | select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target) | ||
1352 | { | ||
1353 | int cpu = smp_processor_id(); | ||
1354 | int prev_cpu = task_cpu(p); | ||
1355 | int i; | ||
1356 | |||
1357 | /* | ||
1358 | * If this domain spans both cpu and prev_cpu (see the SD_WAKE_AFFINE | ||
1359 | * test in select_task_rq_fair) and the prev_cpu is idle then that's | ||
1360 | * always a better target than the current cpu. | ||
1361 | */ | ||
1362 | if (target == cpu && !cpu_rq(prev_cpu)->cfs.nr_running) | ||
1363 | return prev_cpu; | ||
1364 | |||
1365 | /* | ||
1366 | * Otherwise, iterate the domain and find an elegible idle cpu. | ||
1367 | */ | ||
1368 | for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) { | ||
1369 | if (!cpu_rq(i)->cfs.nr_running) { | ||
1370 | target = i; | ||
1371 | break; | ||
1372 | } | ||
1373 | } | ||
1374 | |||
1375 | return target; | ||
1376 | } | ||
1377 | |||
1378 | /* | ||
1348 | * sched_balance_self: balance the current task (running on cpu) in domains | 1379 | * sched_balance_self: balance the current task (running on cpu) in domains |
1349 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | 1380 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and |
1350 | * SD_BALANCE_EXEC. | 1381 | * SD_BALANCE_EXEC. |
@@ -1398,11 +1429,35 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag | |||
1398 | want_sd = 0; | 1429 | want_sd = 0; |
1399 | } | 1430 | } |
1400 | 1431 | ||
1401 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | 1432 | /* |
1402 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | 1433 | * While iterating the domains looking for a spanning |
1434 | * WAKE_AFFINE domain, adjust the affine target to any idle cpu | ||
1435 | * in cache sharing domains along the way. | ||
1436 | */ | ||
1437 | if (want_affine) { | ||
1438 | int target = -1; | ||
1403 | 1439 | ||
1404 | affine_sd = tmp; | 1440 | /* |
1405 | want_affine = 0; | 1441 | * If both cpu and prev_cpu are part of this domain, |
1442 | * cpu is a valid SD_WAKE_AFFINE target. | ||
1443 | */ | ||
1444 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) | ||
1445 | target = cpu; | ||
1446 | |||
1447 | /* | ||
1448 | * If there's an idle sibling in this domain, make that | ||
1449 | * the wake_affine target instead of the current cpu. | ||
1450 | */ | ||
1451 | if (tmp->flags & SD_PREFER_SIBLING) | ||
1452 | target = select_idle_sibling(p, tmp, target); | ||
1453 | |||
1454 | if (target >= 0) { | ||
1455 | if (tmp->flags & SD_WAKE_AFFINE) { | ||
1456 | affine_sd = tmp; | ||
1457 | want_affine = 0; | ||
1458 | } | ||
1459 | cpu = target; | ||
1460 | } | ||
1406 | } | 1461 | } |
1407 | 1462 | ||
1408 | if (!want_sd && !want_affine) | 1463 | if (!want_sd && !want_affine) |
@@ -1679,7 +1734,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
1679 | struct cfs_rq *cfs_rq = &rq->cfs; | 1734 | struct cfs_rq *cfs_rq = &rq->cfs; |
1680 | struct sched_entity *se; | 1735 | struct sched_entity *se; |
1681 | 1736 | ||
1682 | if (unlikely(!cfs_rq->nr_running)) | 1737 | if (!cfs_rq->nr_running) |
1683 | return NULL; | 1738 | return NULL; |
1684 | 1739 | ||
1685 | do { | 1740 | do { |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index a4d790cddb19..5c5fef378415 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1153,29 +1153,12 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
1153 | 1153 | ||
1154 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); | 1154 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); |
1155 | 1155 | ||
1156 | static inline int pick_optimal_cpu(int this_cpu, | ||
1157 | const struct cpumask *mask) | ||
1158 | { | ||
1159 | int first; | ||
1160 | |||
1161 | /* "this_cpu" is cheaper to preempt than a remote processor */ | ||
1162 | if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask)) | ||
1163 | return this_cpu; | ||
1164 | |||
1165 | first = cpumask_first(mask); | ||
1166 | if (first < nr_cpu_ids) | ||
1167 | return first; | ||
1168 | |||
1169 | return -1; | ||
1170 | } | ||
1171 | |||
1172 | static int find_lowest_rq(struct task_struct *task) | 1156 | static int find_lowest_rq(struct task_struct *task) |
1173 | { | 1157 | { |
1174 | struct sched_domain *sd; | 1158 | struct sched_domain *sd; |
1175 | struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); | 1159 | struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask); |
1176 | int this_cpu = smp_processor_id(); | 1160 | int this_cpu = smp_processor_id(); |
1177 | int cpu = task_cpu(task); | 1161 | int cpu = task_cpu(task); |
1178 | cpumask_var_t domain_mask; | ||
1179 | 1162 | ||
1180 | if (task->rt.nr_cpus_allowed == 1) | 1163 | if (task->rt.nr_cpus_allowed == 1) |
1181 | return -1; /* No other targets possible */ | 1164 | return -1; /* No other targets possible */ |
@@ -1198,28 +1181,26 @@ static int find_lowest_rq(struct task_struct *task) | |||
1198 | * Otherwise, we consult the sched_domains span maps to figure | 1181 | * Otherwise, we consult the sched_domains span maps to figure |
1199 | * out which cpu is logically closest to our hot cache data. | 1182 | * out which cpu is logically closest to our hot cache data. |
1200 | */ | 1183 | */ |
1201 | if (this_cpu == cpu) | 1184 | if (!cpumask_test_cpu(this_cpu, lowest_mask)) |
1202 | this_cpu = -1; /* Skip this_cpu opt if the same */ | 1185 | this_cpu = -1; /* Skip this_cpu opt if not among lowest */ |
1203 | |||
1204 | if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) { | ||
1205 | for_each_domain(cpu, sd) { | ||
1206 | if (sd->flags & SD_WAKE_AFFINE) { | ||
1207 | int best_cpu; | ||
1208 | 1186 | ||
1209 | cpumask_and(domain_mask, | 1187 | for_each_domain(cpu, sd) { |
1210 | sched_domain_span(sd), | 1188 | if (sd->flags & SD_WAKE_AFFINE) { |
1211 | lowest_mask); | 1189 | int best_cpu; |
1212 | 1190 | ||
1213 | best_cpu = pick_optimal_cpu(this_cpu, | 1191 | /* |
1214 | domain_mask); | 1192 | * "this_cpu" is cheaper to preempt than a |
1215 | 1193 | * remote processor. | |
1216 | if (best_cpu != -1) { | 1194 | */ |
1217 | free_cpumask_var(domain_mask); | 1195 | if (this_cpu != -1 && |
1218 | return best_cpu; | 1196 | cpumask_test_cpu(this_cpu, sched_domain_span(sd))) |
1219 | } | 1197 | return this_cpu; |
1220 | } | 1198 | |
1199 | best_cpu = cpumask_first_and(lowest_mask, | ||
1200 | sched_domain_span(sd)); | ||
1201 | if (best_cpu < nr_cpu_ids) | ||
1202 | return best_cpu; | ||
1221 | } | 1203 | } |
1222 | free_cpumask_var(domain_mask); | ||
1223 | } | 1204 | } |
1224 | 1205 | ||
1225 | /* | 1206 | /* |
@@ -1227,7 +1208,13 @@ static int find_lowest_rq(struct task_struct *task) | |||
1227 | * just give the caller *something* to work with from the compatible | 1208 | * just give the caller *something* to work with from the compatible |
1228 | * locations. | 1209 | * locations. |
1229 | */ | 1210 | */ |
1230 | return pick_optimal_cpu(this_cpu, lowest_mask); | 1211 | if (this_cpu != -1) |
1212 | return this_cpu; | ||
1213 | |||
1214 | cpu = cpumask_any(lowest_mask); | ||
1215 | if (cpu < nr_cpu_ids) | ||
1216 | return cpu; | ||
1217 | return -1; | ||
1231 | } | 1218 | } |
1232 | 1219 | ||
1233 | /* Will lock the rq it finds */ | 1220 | /* Will lock the rq it finds */ |