diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-26 16:59:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-26 16:59:59 -0400 |
commit | 257f49251c802c67030c11f63cee4ed7b50f6639 (patch) | |
tree | 99fcefa267be1b2ae63b48bf2094157732ecece1 | |
parent | 293a032eb95f3c6c212c1541e94c14b111731313 (diff) | |
parent | e692ab53473c93c0d0820618c97aa74a62ab67da (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched
* git://git.kernel.org/pub/scm/linux/kernel/git/mingo/linux-2.6-sched:
[PATCH] sched: debug feature - make the sched-domains tree runtime-tweakable
[PATCH] sched: add above_background_load() function
[PATCH] sched: update Documentation/sched-stats.txt
[PATCH] sched: mark sysrq_sched_debug_show() static
[PATCH] sched: make cpu_clock() not use the rq clock
[PATCH] sched: remove unused rq->load_balance_class
[PATCH] sched: arch preempt notifier mechanism
[PATCH] sched: increase SCHED_LOAD_SCALE_FUZZ
-rw-r--r-- | Documentation/sched-stats.txt | 195 | ||||
-rw-r--r-- | include/linux/preempt.h | 44 | ||||
-rw-r--r-- | include/linux/sched.h | 23 | ||||
-rw-r--r-- | kernel/Kconfig.preempt | 3 | ||||
-rw-r--r-- | kernel/sched.c | 204 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 |
6 files changed, 365 insertions, 106 deletions
diff --git a/Documentation/sched-stats.txt b/Documentation/sched-stats.txt index 6f72021aae51..442e14d35dea 100644 --- a/Documentation/sched-stats.txt +++ b/Documentation/sched-stats.txt | |||
@@ -1,10 +1,11 @@ | |||
1 | Version 10 of schedstats includes support for sched_domains, which | 1 | Version 14 of schedstats includes support for sched_domains, which hit the |
2 | hit the mainline kernel in 2.6.7. Some counters make more sense to be | 2 | mainline kernel in 2.6.20 although it is identical to the stats from version |
3 | per-runqueue; other to be per-domain. Note that domains (and their associated | 3 | 12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel |
4 | information) will only be pertinent and available on machines utilizing | 4 | release). Some counters make more sense to be per-runqueue; other to be |
5 | CONFIG_SMP. | 5 | per-domain. Note that domains (and their associated information) will only |
6 | 6 | be pertinent and available on machines utilizing CONFIG_SMP. | |
7 | In version 10 of schedstat, there is at least one level of domain | 7 | |
8 | In version 14 of schedstat, there is at least one level of domain | ||
8 | statistics for each cpu listed, and there may well be more than one | 9 | statistics for each cpu listed, and there may well be more than one |
9 | domain. Domains have no particular names in this implementation, but | 10 | domain. Domains have no particular names in this implementation, but |
10 | the highest numbered one typically arbitrates balancing across all the | 11 | the highest numbered one typically arbitrates balancing across all the |
@@ -27,7 +28,7 @@ to write their own scripts, the fields are described here. | |||
27 | 28 | ||
28 | CPU statistics | 29 | CPU statistics |
29 | -------------- | 30 | -------------- |
30 | cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | 31 | cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 |
31 | 32 | ||
32 | NOTE: In the sched_yield() statistics, the active queue is considered empty | 33 | NOTE: In the sched_yield() statistics, the active queue is considered empty |
33 | if it has only one process in it, since obviously the process calling | 34 | if it has only one process in it, since obviously the process calling |
@@ -39,48 +40,20 @@ First four fields are sched_yield() statistics: | |||
39 | 3) # of times just the expired queue was empty | 40 | 3) # of times just the expired queue was empty |
40 | 4) # of times sched_yield() was called | 41 | 4) # of times sched_yield() was called |
41 | 42 | ||
42 | Next four are schedule() statistics: | 43 | Next three are schedule() statistics: |
43 | 5) # of times the active queue had at least one other process on it | 44 | 5) # of times we switched to the expired queue and reused it |
44 | 6) # of times we switched to the expired queue and reused it | 45 | 6) # of times schedule() was called |
45 | 7) # of times schedule() was called | 46 | 7) # of times schedule() left the processor idle |
46 | 8) # of times schedule() left the processor idle | ||
47 | |||
48 | Next four are active_load_balance() statistics: | ||
49 | 9) # of times active_load_balance() was called | ||
50 | 10) # of times active_load_balance() caused this cpu to gain a task | ||
51 | 11) # of times active_load_balance() caused this cpu to lose a task | ||
52 | 12) # of times active_load_balance() tried to move a task and failed | ||
53 | |||
54 | Next three are try_to_wake_up() statistics: | ||
55 | 13) # of times try_to_wake_up() was called | ||
56 | 14) # of times try_to_wake_up() successfully moved the awakening task | ||
57 | 15) # of times try_to_wake_up() attempted to move the awakening task | ||
58 | |||
59 | Next two are wake_up_new_task() statistics: | ||
60 | 16) # of times wake_up_new_task() was called | ||
61 | 17) # of times wake_up_new_task() successfully moved the new task | ||
62 | |||
63 | Next one is a sched_migrate_task() statistic: | ||
64 | 18) # of times sched_migrate_task() was called | ||
65 | 47 | ||
66 | Next one is a sched_balance_exec() statistic: | 48 | Next two are try_to_wake_up() statistics: |
67 | 19) # of times sched_balance_exec() was called | 49 | 8) # of times try_to_wake_up() was called |
50 | 9) # of times try_to_wake_up() was called to wake up the local cpu | ||
68 | 51 | ||
69 | Next three are statistics describing scheduling latency: | 52 | Next three are statistics describing scheduling latency: |
70 | 20) sum of all time spent running by tasks on this processor (in ms) | 53 | 10) sum of all time spent running by tasks on this processor (in jiffies) |
71 | 21) sum of all time spent waiting to run by tasks on this processor (in ms) | 54 | 11) sum of all time spent waiting to run by tasks on this processor (in |
72 | 22) # of tasks (not necessarily unique) given to the processor | 55 | jiffies) |
73 | 56 | 12) # of timeslices run on this cpu | |
74 | The last six are statistics dealing with pull_task(): | ||
75 | 23) # of times pull_task() moved a task to this cpu when newly idle | ||
76 | 24) # of times pull_task() stole a task from this cpu when another cpu | ||
77 | was newly idle | ||
78 | 25) # of times pull_task() moved a task to this cpu when idle | ||
79 | 26) # of times pull_task() stole a task from this cpu when another cpu | ||
80 | was idle | ||
81 | 27) # of times pull_task() moved a task to this cpu when busy | ||
82 | 28) # of times pull_task() stole a task from this cpu when another cpu | ||
83 | was busy | ||
84 | 57 | ||
85 | 58 | ||
86 | Domain statistics | 59 | Domain statistics |
@@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if | |||
89 | CONFIG_SMP is not defined, *no* domains are utilized and these lines | 62 | CONFIG_SMP is not defined, *no* domains are utilized and these lines |
90 | will not appear in the output.) | 63 | will not appear in the output.) |
91 | 64 | ||
92 | domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | 65 | domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
93 | 66 | ||
94 | The first field is a bit mask indicating what cpus this domain operates over. | 67 | The first field is a bit mask indicating what cpus this domain operates over. |
95 | 68 | ||
96 | The next fifteen are a variety of load_balance() statistics: | 69 | The next 24 are a variety of load_balance() statistics in grouped into types |
97 | 70 | of idleness (idle, busy, and newly idle): | |
98 | 1) # of times in this domain load_balance() was called when the cpu | 71 | |
99 | was idle | 72 | 1) # of times in this domain load_balance() was called when the |
100 | 2) # of times in this domain load_balance() was called when the cpu | 73 | cpu was idle |
101 | was busy | 74 | 2) # of times in this domain load_balance() checked but found |
102 | 3) # of times in this domain load_balance() was called when the cpu | 75 | the load did not require balancing when the cpu was idle |
103 | was just becoming idle | 76 | 3) # of times in this domain load_balance() tried to move one or |
104 | 4) # of times in this domain load_balance() tried to move one or more | 77 | more tasks and failed, when the cpu was idle |
105 | tasks and failed, when the cpu was idle | 78 | 4) sum of imbalances discovered (if any) with each call to |
106 | 5) # of times in this domain load_balance() tried to move one or more | 79 | load_balance() in this domain when the cpu was idle |
107 | tasks and failed, when the cpu was busy | 80 | 5) # of times in this domain pull_task() was called when the cpu |
108 | 6) # of times in this domain load_balance() tried to move one or more | 81 | was idle |
109 | tasks and failed, when the cpu was just becoming idle | 82 | 6) # of times in this domain pull_task() was called even though |
110 | 7) sum of imbalances discovered (if any) with each call to | 83 | the target task was cache-hot when idle |
111 | load_balance() in this domain when the cpu was idle | 84 | 7) # of times in this domain load_balance() was called but did |
112 | 8) sum of imbalances discovered (if any) with each call to | 85 | not find a busier queue while the cpu was idle |
113 | load_balance() in this domain when the cpu was busy | 86 | 8) # of times in this domain a busier queue was found while the |
114 | 9) sum of imbalances discovered (if any) with each call to | 87 | cpu was idle but no busier group was found |
115 | load_balance() in this domain when the cpu was just becoming idle | 88 | |
116 | 10) # of times in this domain load_balance() was called but did not find | 89 | 9) # of times in this domain load_balance() was called when the |
117 | a busier queue while the cpu was idle | 90 | cpu was busy |
118 | 11) # of times in this domain load_balance() was called but did not find | 91 | 10) # of times in this domain load_balance() checked but found the |
119 | a busier queue while the cpu was busy | 92 | load did not require balancing when busy |
120 | 12) # of times in this domain load_balance() was called but did not find | 93 | 11) # of times in this domain load_balance() tried to move one or |
121 | a busier queue while the cpu was just becoming idle | 94 | more tasks and failed, when the cpu was busy |
122 | 13) # of times in this domain a busier queue was found while the cpu was | 95 | 12) sum of imbalances discovered (if any) with each call to |
123 | idle but no busier group was found | 96 | load_balance() in this domain when the cpu was busy |
124 | 14) # of times in this domain a busier queue was found while the cpu was | 97 | 13) # of times in this domain pull_task() was called when busy |
125 | busy but no busier group was found | 98 | 14) # of times in this domain pull_task() was called even though the |
126 | 15) # of times in this domain a busier queue was found while the cpu was | 99 | target task was cache-hot when busy |
127 | just becoming idle but no busier group was found | 100 | 15) # of times in this domain load_balance() was called but did not |
128 | 101 | find a busier queue while the cpu was busy | |
129 | Next two are sched_balance_exec() statistics: | 102 | 16) # of times in this domain a busier queue was found while the cpu |
130 | 17) # of times in this domain sched_balance_exec() successfully pushed | 103 | was busy but no busier group was found |
131 | a task to a new cpu | 104 | |
132 | 18) # of times in this domain sched_balance_exec() tried but failed to | 105 | 17) # of times in this domain load_balance() was called when the |
133 | push a task to a new cpu | 106 | cpu was just becoming idle |
134 | 107 | 18) # of times in this domain load_balance() checked but found the | |
135 | Next two are try_to_wake_up() statistics: | 108 | load did not require balancing when the cpu was just becoming idle |
136 | 19) # of times in this domain try_to_wake_up() tried to move a task based | 109 | 19) # of times in this domain load_balance() tried to move one or more |
137 | on affinity and cache warmth | 110 | tasks and failed, when the cpu was just becoming idle |
138 | 20) # of times in this domain try_to_wake_up() tried to move a task based | 111 | 20) sum of imbalances discovered (if any) with each call to |
139 | on load balancing | 112 | load_balance() in this domain when the cpu was just becoming idle |
140 | 113 | 21) # of times in this domain pull_task() was called when newly idle | |
114 | 22) # of times in this domain pull_task() was called even though the | ||
115 | target task was cache-hot when just becoming idle | ||
116 | 23) # of times in this domain load_balance() was called but did not | ||
117 | find a busier queue while the cpu was just becoming idle | ||
118 | 24) # of times in this domain a busier queue was found while the cpu | ||
119 | was just becoming idle but no busier group was found | ||
120 | |||
121 | Next three are active_load_balance() statistics: | ||
122 | 25) # of times active_load_balance() was called | ||
123 | 26) # of times active_load_balance() tried to move a task and failed | ||
124 | 27) # of times active_load_balance() successfully moved a task | ||
125 | |||
126 | Next three are sched_balance_exec() statistics: | ||
127 | 28) sbe_cnt is not used | ||
128 | 29) sbe_balanced is not used | ||
129 | 30) sbe_pushed is not used | ||
130 | |||
131 | Next three are sched_balance_fork() statistics: | ||
132 | 31) sbf_cnt is not used | ||
133 | 32) sbf_balanced is not used | ||
134 | 33) sbf_pushed is not used | ||
135 | |||
136 | Next three are try_to_wake_up() statistics: | ||
137 | 34) # of times in this domain try_to_wake_up() awoke a task that | ||
138 | last ran on a different cpu in this domain | ||
139 | 35) # of times in this domain try_to_wake_up() moved a task to the | ||
140 | waking cpu because it was cache-cold on its own cpu anyway | ||
141 | 36) # of times in this domain try_to_wake_up() started passive balancing | ||
141 | 142 | ||
142 | /proc/<pid>/schedstat | 143 | /proc/<pid>/schedstat |
143 | ---------------- | 144 | ---------------- |
144 | schedstats also adds a new /proc/<pid/schedstat file to include some of | 145 | schedstats also adds a new /proc/<pid/schedstat file to include some of |
145 | the same information on a per-process level. There are three fields in | 146 | the same information on a per-process level. There are three fields in |
146 | this file correlating to fields 20, 21, and 22 in the CPU fields, but | 147 | this file correlating for that process to: |
147 | they only apply for that process. | 148 | 1) time spent on the cpu |
149 | 2) time spent waiting on a runqueue | ||
150 | 3) # of timeslices run on this cpu | ||
148 | 151 | ||
149 | A program could be easily written to make use of these extra fields to | 152 | A program could be easily written to make use of these extra fields to |
150 | report on how well a particular process or set of processes is faring | 153 | report on how well a particular process or set of processes is faring |
151 | under the scheduler's policies. A simple version of such a program is | 154 | under the scheduler's policies. A simple version of such a program is |
152 | available at | 155 | available at |
153 | http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c | 156 | http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c |
diff --git a/include/linux/preempt.h b/include/linux/preempt.h index d0926d63406c..484988ed301e 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h | |||
@@ -8,6 +8,7 @@ | |||
8 | 8 | ||
9 | #include <linux/thread_info.h> | 9 | #include <linux/thread_info.h> |
10 | #include <linux/linkage.h> | 10 | #include <linux/linkage.h> |
11 | #include <linux/list.h> | ||
11 | 12 | ||
12 | #ifdef CONFIG_DEBUG_PREEMPT | 13 | #ifdef CONFIG_DEBUG_PREEMPT |
13 | extern void fastcall add_preempt_count(int val); | 14 | extern void fastcall add_preempt_count(int val); |
@@ -60,4 +61,47 @@ do { \ | |||
60 | 61 | ||
61 | #endif | 62 | #endif |
62 | 63 | ||
64 | #ifdef CONFIG_PREEMPT_NOTIFIERS | ||
65 | |||
66 | struct preempt_notifier; | ||
67 | |||
68 | /** | ||
69 | * preempt_ops - notifiers called when a task is preempted and rescheduled | ||
70 | * @sched_in: we're about to be rescheduled: | ||
71 | * notifier: struct preempt_notifier for the task being scheduled | ||
72 | * cpu: cpu we're scheduled on | ||
73 | * @sched_out: we've just been preempted | ||
74 | * notifier: struct preempt_notifier for the task being preempted | ||
75 | * next: the task that's kicking us out | ||
76 | */ | ||
77 | struct preempt_ops { | ||
78 | void (*sched_in)(struct preempt_notifier *notifier, int cpu); | ||
79 | void (*sched_out)(struct preempt_notifier *notifier, | ||
80 | struct task_struct *next); | ||
81 | }; | ||
82 | |||
83 | /** | ||
84 | * preempt_notifier - key for installing preemption notifiers | ||
85 | * @link: internal use | ||
86 | * @ops: defines the notifier functions to be called | ||
87 | * | ||
88 | * Usually used in conjunction with container_of(). | ||
89 | */ | ||
90 | struct preempt_notifier { | ||
91 | struct hlist_node link; | ||
92 | struct preempt_ops *ops; | ||
93 | }; | ||
94 | |||
95 | void preempt_notifier_register(struct preempt_notifier *notifier); | ||
96 | void preempt_notifier_unregister(struct preempt_notifier *notifier); | ||
97 | |||
98 | static inline void preempt_notifier_init(struct preempt_notifier *notifier, | ||
99 | struct preempt_ops *ops) | ||
100 | { | ||
101 | INIT_HLIST_NODE(¬ifier->link); | ||
102 | notifier->ops = ops; | ||
103 | } | ||
104 | |||
105 | #endif | ||
106 | |||
63 | #endif /* __LINUX_PREEMPT_H */ | 107 | #endif /* __LINUX_PREEMPT_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 33b9b4841ee7..2e490271acf6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -681,7 +681,7 @@ enum cpu_idle_type { | |||
681 | #define SCHED_LOAD_SHIFT 10 | 681 | #define SCHED_LOAD_SHIFT 10 |
682 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) | 682 | #define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) |
683 | 683 | ||
684 | #define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5) | 684 | #define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 1) |
685 | 685 | ||
686 | #ifdef CONFIG_SMP | 686 | #ifdef CONFIG_SMP |
687 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ | 687 | #define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ |
@@ -786,6 +786,22 @@ extern int partition_sched_domains(cpumask_t *partition1, | |||
786 | 786 | ||
787 | #endif /* CONFIG_SMP */ | 787 | #endif /* CONFIG_SMP */ |
788 | 788 | ||
789 | /* | ||
790 | * A runqueue laden with a single nice 0 task scores a weighted_cpuload of | ||
791 | * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a | ||
792 | * task of nice 0 or enough lower priority tasks to bring up the | ||
793 | * weighted_cpuload | ||
794 | */ | ||
795 | static inline int above_background_load(void) | ||
796 | { | ||
797 | unsigned long cpu; | ||
798 | |||
799 | for_each_online_cpu(cpu) { | ||
800 | if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE) | ||
801 | return 1; | ||
802 | } | ||
803 | return 0; | ||
804 | } | ||
789 | 805 | ||
790 | struct io_context; /* See blkdev.h */ | 806 | struct io_context; /* See blkdev.h */ |
791 | struct cpuset; | 807 | struct cpuset; |
@@ -935,6 +951,11 @@ struct task_struct { | |||
935 | struct sched_class *sched_class; | 951 | struct sched_class *sched_class; |
936 | struct sched_entity se; | 952 | struct sched_entity se; |
937 | 953 | ||
954 | #ifdef CONFIG_PREEMPT_NOTIFIERS | ||
955 | /* list of struct preempt_notifier: */ | ||
956 | struct hlist_head preempt_notifiers; | ||
957 | #endif | ||
958 | |||
938 | unsigned short ioprio; | 959 | unsigned short ioprio; |
939 | #ifdef CONFIG_BLK_DEV_IO_TRACE | 960 | #ifdef CONFIG_BLK_DEV_IO_TRACE |
940 | unsigned int btrace_seq; | 961 | unsigned int btrace_seq; |
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c64ce9c14207..6b066632e40c 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt | |||
@@ -63,3 +63,6 @@ config PREEMPT_BKL | |||
63 | Say Y here if you are building a kernel for a desktop system. | 63 | Say Y here if you are building a kernel for a desktop system. |
64 | Say N if you are unsure. | 64 | Say N if you are unsure. |
65 | 65 | ||
66 | config PREEMPT_NOTIFIERS | ||
67 | bool | ||
68 | |||
diff --git a/kernel/sched.c b/kernel/sched.c index 93cf241cfbe9..5c51d7e5dcc1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/percpu.h> | 53 | #include <linux/percpu.h> |
54 | #include <linux/kthread.h> | 54 | #include <linux/kthread.h> |
55 | #include <linux/seq_file.h> | 55 | #include <linux/seq_file.h> |
56 | #include <linux/sysctl.h> | ||
56 | #include <linux/syscalls.h> | 57 | #include <linux/syscalls.h> |
57 | #include <linux/times.h> | 58 | #include <linux/times.h> |
58 | #include <linux/tsacct_kern.h> | 59 | #include <linux/tsacct_kern.h> |
@@ -263,8 +264,6 @@ struct rq { | |||
263 | unsigned int clock_warps, clock_overflows; | 264 | unsigned int clock_warps, clock_overflows; |
264 | unsigned int clock_unstable_events; | 265 | unsigned int clock_unstable_events; |
265 | 266 | ||
266 | struct sched_class *load_balance_class; | ||
267 | |||
268 | atomic_t nr_iowait; | 267 | atomic_t nr_iowait; |
269 | 268 | ||
270 | #ifdef CONFIG_SMP | 269 | #ifdef CONFIG_SMP |
@@ -385,13 +384,12 @@ static inline unsigned long long rq_clock(struct rq *rq) | |||
385 | */ | 384 | */ |
386 | unsigned long long cpu_clock(int cpu) | 385 | unsigned long long cpu_clock(int cpu) |
387 | { | 386 | { |
388 | struct rq *rq = cpu_rq(cpu); | ||
389 | unsigned long long now; | 387 | unsigned long long now; |
390 | unsigned long flags; | 388 | unsigned long flags; |
391 | 389 | ||
392 | spin_lock_irqsave(&rq->lock, flags); | 390 | local_irq_save(flags); |
393 | now = rq_clock(rq); | 391 | now = rq_clock(cpu_rq(cpu)); |
394 | spin_unlock_irqrestore(&rq->lock, flags); | 392 | local_irq_restore(flags); |
395 | 393 | ||
396 | return now; | 394 | return now; |
397 | } | 395 | } |
@@ -1592,6 +1590,10 @@ static void __sched_fork(struct task_struct *p) | |||
1592 | INIT_LIST_HEAD(&p->run_list); | 1590 | INIT_LIST_HEAD(&p->run_list); |
1593 | p->se.on_rq = 0; | 1591 | p->se.on_rq = 0; |
1594 | 1592 | ||
1593 | #ifdef CONFIG_PREEMPT_NOTIFIERS | ||
1594 | INIT_HLIST_HEAD(&p->preempt_notifiers); | ||
1595 | #endif | ||
1596 | |||
1595 | /* | 1597 | /* |
1596 | * We mark the process as running here, but have not actually | 1598 | * We mark the process as running here, but have not actually |
1597 | * inserted it onto the runqueue yet. This guarantees that | 1599 | * inserted it onto the runqueue yet. This guarantees that |
@@ -1673,6 +1675,63 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
1673 | task_rq_unlock(rq, &flags); | 1675 | task_rq_unlock(rq, &flags); |
1674 | } | 1676 | } |
1675 | 1677 | ||
1678 | #ifdef CONFIG_PREEMPT_NOTIFIERS | ||
1679 | |||
1680 | /** | ||
1681 | * preempt_notifier_register - tell me when current is being being preempted | ||
1682 | * and rescheduled | ||
1683 | */ | ||
1684 | void preempt_notifier_register(struct preempt_notifier *notifier) | ||
1685 | { | ||
1686 | hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); | ||
1687 | } | ||
1688 | EXPORT_SYMBOL_GPL(preempt_notifier_register); | ||
1689 | |||
1690 | /** | ||
1691 | * preempt_notifier_unregister - no longer interested in preemption notifications | ||
1692 | * | ||
1693 | * This is safe to call from within a preemption notifier. | ||
1694 | */ | ||
1695 | void preempt_notifier_unregister(struct preempt_notifier *notifier) | ||
1696 | { | ||
1697 | hlist_del(¬ifier->link); | ||
1698 | } | ||
1699 | EXPORT_SYMBOL_GPL(preempt_notifier_unregister); | ||
1700 | |||
1701 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | ||
1702 | { | ||
1703 | struct preempt_notifier *notifier; | ||
1704 | struct hlist_node *node; | ||
1705 | |||
1706 | hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) | ||
1707 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); | ||
1708 | } | ||
1709 | |||
1710 | static void | ||
1711 | fire_sched_out_preempt_notifiers(struct task_struct *curr, | ||
1712 | struct task_struct *next) | ||
1713 | { | ||
1714 | struct preempt_notifier *notifier; | ||
1715 | struct hlist_node *node; | ||
1716 | |||
1717 | hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) | ||
1718 | notifier->ops->sched_out(notifier, next); | ||
1719 | } | ||
1720 | |||
1721 | #else | ||
1722 | |||
1723 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | ||
1724 | { | ||
1725 | } | ||
1726 | |||
1727 | static void | ||
1728 | fire_sched_out_preempt_notifiers(struct task_struct *curr, | ||
1729 | struct task_struct *next) | ||
1730 | { | ||
1731 | } | ||
1732 | |||
1733 | #endif | ||
1734 | |||
1676 | /** | 1735 | /** |
1677 | * prepare_task_switch - prepare to switch tasks | 1736 | * prepare_task_switch - prepare to switch tasks |
1678 | * @rq: the runqueue preparing to switch | 1737 | * @rq: the runqueue preparing to switch |
@@ -1685,8 +1744,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
1685 | * prepare_task_switch sets up locking and calls architecture specific | 1744 | * prepare_task_switch sets up locking and calls architecture specific |
1686 | * hooks. | 1745 | * hooks. |
1687 | */ | 1746 | */ |
1688 | static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) | 1747 | static inline void |
1748 | prepare_task_switch(struct rq *rq, struct task_struct *prev, | ||
1749 | struct task_struct *next) | ||
1689 | { | 1750 | { |
1751 | fire_sched_out_preempt_notifiers(prev, next); | ||
1690 | prepare_lock_switch(rq, next); | 1752 | prepare_lock_switch(rq, next); |
1691 | prepare_arch_switch(next); | 1753 | prepare_arch_switch(next); |
1692 | } | 1754 | } |
@@ -1728,6 +1790,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
1728 | prev_state = prev->state; | 1790 | prev_state = prev->state; |
1729 | finish_arch_switch(prev); | 1791 | finish_arch_switch(prev); |
1730 | finish_lock_switch(rq, prev); | 1792 | finish_lock_switch(rq, prev); |
1793 | fire_sched_in_preempt_notifiers(current); | ||
1731 | if (mm) | 1794 | if (mm) |
1732 | mmdrop(mm); | 1795 | mmdrop(mm); |
1733 | if (unlikely(prev_state == TASK_DEAD)) { | 1796 | if (unlikely(prev_state == TASK_DEAD)) { |
@@ -1768,7 +1831,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1768 | { | 1831 | { |
1769 | struct mm_struct *mm, *oldmm; | 1832 | struct mm_struct *mm, *oldmm; |
1770 | 1833 | ||
1771 | prepare_task_switch(rq, next); | 1834 | prepare_task_switch(rq, prev, next); |
1772 | mm = next->mm; | 1835 | mm = next->mm; |
1773 | oldmm = prev->active_mm; | 1836 | oldmm = prev->active_mm; |
1774 | /* | 1837 | /* |
@@ -5140,10 +5203,129 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
5140 | if (!next) | 5203 | if (!next) |
5141 | break; | 5204 | break; |
5142 | migrate_dead(dead_cpu, next); | 5205 | migrate_dead(dead_cpu, next); |
5206 | |||
5143 | } | 5207 | } |
5144 | } | 5208 | } |
5145 | #endif /* CONFIG_HOTPLUG_CPU */ | 5209 | #endif /* CONFIG_HOTPLUG_CPU */ |
5146 | 5210 | ||
5211 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | ||
5212 | |||
5213 | static struct ctl_table sd_ctl_dir[] = { | ||
5214 | {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, | ||
5215 | {0,}, | ||
5216 | }; | ||
5217 | |||
5218 | static struct ctl_table sd_ctl_root[] = { | ||
5219 | {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, | ||
5220 | {0,}, | ||
5221 | }; | ||
5222 | |||
5223 | static struct ctl_table *sd_alloc_ctl_entry(int n) | ||
5224 | { | ||
5225 | struct ctl_table *entry = | ||
5226 | kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL); | ||
5227 | |||
5228 | BUG_ON(!entry); | ||
5229 | memset(entry, 0, n * sizeof(struct ctl_table)); | ||
5230 | |||
5231 | return entry; | ||
5232 | } | ||
5233 | |||
5234 | static void | ||
5235 | set_table_entry(struct ctl_table *entry, int ctl_name, | ||
5236 | const char *procname, void *data, int maxlen, | ||
5237 | mode_t mode, proc_handler *proc_handler) | ||
5238 | { | ||
5239 | entry->ctl_name = ctl_name; | ||
5240 | entry->procname = procname; | ||
5241 | entry->data = data; | ||
5242 | entry->maxlen = maxlen; | ||
5243 | entry->mode = mode; | ||
5244 | entry->proc_handler = proc_handler; | ||
5245 | } | ||
5246 | |||
5247 | static struct ctl_table * | ||
5248 | sd_alloc_ctl_domain_table(struct sched_domain *sd) | ||
5249 | { | ||
5250 | struct ctl_table *table = sd_alloc_ctl_entry(14); | ||
5251 | |||
5252 | set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, | ||
5253 | sizeof(long), 0644, proc_doulongvec_minmax); | ||
5254 | set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, | ||
5255 | sizeof(long), 0644, proc_doulongvec_minmax); | ||
5256 | set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, | ||
5257 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5258 | set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, | ||
5259 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5260 | set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, | ||
5261 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5262 | set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, | ||
5263 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5264 | set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, | ||
5265 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5266 | set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, | ||
5267 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5268 | set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, | ||
5269 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5270 | set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time, | ||
5271 | sizeof(long long), 0644, proc_doulongvec_minmax); | ||
5272 | set_table_entry(&table[10], 11, "cache_nice_tries", | ||
5273 | &sd->cache_nice_tries, | ||
5274 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5275 | set_table_entry(&table[12], 13, "flags", &sd->flags, | ||
5276 | sizeof(int), 0644, proc_dointvec_minmax); | ||
5277 | |||
5278 | return table; | ||
5279 | } | ||
5280 | |||
5281 | static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | ||
5282 | { | ||
5283 | struct ctl_table *entry, *table; | ||
5284 | struct sched_domain *sd; | ||
5285 | int domain_num = 0, i; | ||
5286 | char buf[32]; | ||
5287 | |||
5288 | for_each_domain(cpu, sd) | ||
5289 | domain_num++; | ||
5290 | entry = table = sd_alloc_ctl_entry(domain_num + 1); | ||
5291 | |||
5292 | i = 0; | ||
5293 | for_each_domain(cpu, sd) { | ||
5294 | snprintf(buf, 32, "domain%d", i); | ||
5295 | entry->ctl_name = i + 1; | ||
5296 | entry->procname = kstrdup(buf, GFP_KERNEL); | ||
5297 | entry->mode = 0755; | ||
5298 | entry->child = sd_alloc_ctl_domain_table(sd); | ||
5299 | entry++; | ||
5300 | i++; | ||
5301 | } | ||
5302 | return table; | ||
5303 | } | ||
5304 | |||
5305 | static struct ctl_table_header *sd_sysctl_header; | ||
5306 | static void init_sched_domain_sysctl(void) | ||
5307 | { | ||
5308 | int i, cpu_num = num_online_cpus(); | ||
5309 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | ||
5310 | char buf[32]; | ||
5311 | |||
5312 | sd_ctl_dir[0].child = entry; | ||
5313 | |||
5314 | for (i = 0; i < cpu_num; i++, entry++) { | ||
5315 | snprintf(buf, 32, "cpu%d", i); | ||
5316 | entry->ctl_name = i + 1; | ||
5317 | entry->procname = kstrdup(buf, GFP_KERNEL); | ||
5318 | entry->mode = 0755; | ||
5319 | entry->child = sd_alloc_ctl_cpu_table(i); | ||
5320 | } | ||
5321 | sd_sysctl_header = register_sysctl_table(sd_ctl_root); | ||
5322 | } | ||
5323 | #else | ||
5324 | static void init_sched_domain_sysctl(void) | ||
5325 | { | ||
5326 | } | ||
5327 | #endif | ||
5328 | |||
5147 | /* | 5329 | /* |
5148 | * migration_call - callback that gets triggered when a CPU is added. | 5330 | * migration_call - callback that gets triggered when a CPU is added. |
5149 | * Here we can start up the necessary migration thread for the new CPU. | 5331 | * Here we can start up the necessary migration thread for the new CPU. |
@@ -6249,6 +6431,8 @@ void __init sched_init_smp(void) | |||
6249 | /* XXX: Theoretical race here - CPU may be hotplugged now */ | 6431 | /* XXX: Theoretical race here - CPU may be hotplugged now */ |
6250 | hotcpu_notifier(update_sched_domains, 0); | 6432 | hotcpu_notifier(update_sched_domains, 0); |
6251 | 6433 | ||
6434 | init_sched_domain_sysctl(); | ||
6435 | |||
6252 | /* Move init over to a non-isolated CPU */ | 6436 | /* Move init over to a non-isolated CPU */ |
6253 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) | 6437 | if (set_cpus_allowed(current, non_isolated_cpus) < 0) |
6254 | BUG(); | 6438 | BUG(); |
@@ -6335,6 +6519,10 @@ void __init sched_init(void) | |||
6335 | 6519 | ||
6336 | set_load_weight(&init_task); | 6520 | set_load_weight(&init_task); |
6337 | 6521 | ||
6522 | #ifdef CONFIG_PREEMPT_NOTIFIERS | ||
6523 | INIT_HLIST_HEAD(&init_task.preempt_notifiers); | ||
6524 | #endif | ||
6525 | |||
6338 | #ifdef CONFIG_SMP | 6526 | #ifdef CONFIG_SMP |
6339 | nr_cpu_ids = highest_cpu + 1; | 6527 | nr_cpu_ids = highest_cpu + 1; |
6340 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); | 6528 | open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 29f2c21e7da2..42970f723a97 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -186,7 +186,7 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
186 | return 0; | 186 | return 0; |
187 | } | 187 | } |
188 | 188 | ||
189 | void sysrq_sched_debug_show(void) | 189 | static void sysrq_sched_debug_show(void) |
190 | { | 190 | { |
191 | sched_debug_show(NULL, NULL); | 191 | sched_debug_show(NULL, NULL); |
192 | } | 192 | } |