aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sched-stats.txt195
-rw-r--r--include/linux/preempt.h44
-rw-r--r--include/linux/sched.h23
-rw-r--r--kernel/Kconfig.preempt3
-rw-r--r--kernel/sched.c204
-rw-r--r--kernel/sched_debug.c2
6 files changed, 365 insertions, 106 deletions
diff --git a/Documentation/sched-stats.txt b/Documentation/sched-stats.txt
index 6f72021aae51..442e14d35dea 100644
--- a/Documentation/sched-stats.txt
+++ b/Documentation/sched-stats.txt
@@ -1,10 +1,11 @@
1Version 10 of schedstats includes support for sched_domains, which 1Version 14 of schedstats includes support for sched_domains, which hit the
2hit the mainline kernel in 2.6.7. Some counters make more sense to be 2mainline kernel in 2.6.20 although it is identical to the stats from version
3per-runqueue; other to be per-domain. Note that domains (and their associated 312 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
4information) will only be pertinent and available on machines utilizing 4release). Some counters make more sense to be per-runqueue; other to be
5CONFIG_SMP. 5per-domain. Note that domains (and their associated information) will only
6 6be pertinent and available on machines utilizing CONFIG_SMP.
7In version 10 of schedstat, there is at least one level of domain 7
8In version 14 of schedstat, there is at least one level of domain
8statistics for each cpu listed, and there may well be more than one 9statistics for each cpu listed, and there may well be more than one
9domain. Domains have no particular names in this implementation, but 10domain. Domains have no particular names in this implementation, but
10the highest numbered one typically arbitrates balancing across all the 11the highest numbered one typically arbitrates balancing across all the
@@ -27,7 +28,7 @@ to write their own scripts, the fields are described here.
27 28
28CPU statistics 29CPU statistics
29-------------- 30--------------
30cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 31cpu<N> 1 2 3 4 5 6 7 8 9 10 11 12
31 32
32NOTE: In the sched_yield() statistics, the active queue is considered empty 33NOTE: In the sched_yield() statistics, the active queue is considered empty
33 if it has only one process in it, since obviously the process calling 34 if it has only one process in it, since obviously the process calling
@@ -39,48 +40,20 @@ First four fields are sched_yield() statistics:
39 3) # of times just the expired queue was empty 40 3) # of times just the expired queue was empty
40 4) # of times sched_yield() was called 41 4) # of times sched_yield() was called
41 42
42Next four are schedule() statistics: 43Next three are schedule() statistics:
43 5) # of times the active queue had at least one other process on it 44 5) # of times we switched to the expired queue and reused it
44 6) # of times we switched to the expired queue and reused it 45 6) # of times schedule() was called
45 7) # of times schedule() was called 46 7) # of times schedule() left the processor idle
46 8) # of times schedule() left the processor idle
47
48Next four are active_load_balance() statistics:
49 9) # of times active_load_balance() was called
50 10) # of times active_load_balance() caused this cpu to gain a task
51 11) # of times active_load_balance() caused this cpu to lose a task
52 12) # of times active_load_balance() tried to move a task and failed
53
54Next three are try_to_wake_up() statistics:
55 13) # of times try_to_wake_up() was called
56 14) # of times try_to_wake_up() successfully moved the awakening task
57 15) # of times try_to_wake_up() attempted to move the awakening task
58
59Next two are wake_up_new_task() statistics:
60 16) # of times wake_up_new_task() was called
61 17) # of times wake_up_new_task() successfully moved the new task
62
63Next one is a sched_migrate_task() statistic:
64 18) # of times sched_migrate_task() was called
65 47
66Next one is a sched_balance_exec() statistic: 48Next two are try_to_wake_up() statistics:
67 19) # of times sched_balance_exec() was called 49 8) # of times try_to_wake_up() was called
50 9) # of times try_to_wake_up() was called to wake up the local cpu
68 51
69Next three are statistics describing scheduling latency: 52Next three are statistics describing scheduling latency:
70 20) sum of all time spent running by tasks on this processor (in ms) 53 10) sum of all time spent running by tasks on this processor (in jiffies)
71 21) sum of all time spent waiting to run by tasks on this processor (in ms) 54 11) sum of all time spent waiting to run by tasks on this processor (in
72 22) # of tasks (not necessarily unique) given to the processor 55 jiffies)
73 56 12) # of timeslices run on this cpu
74The last six are statistics dealing with pull_task():
75 23) # of times pull_task() moved a task to this cpu when newly idle
76 24) # of times pull_task() stole a task from this cpu when another cpu
77 was newly idle
78 25) # of times pull_task() moved a task to this cpu when idle
79 26) # of times pull_task() stole a task from this cpu when another cpu
80 was idle
81 27) # of times pull_task() moved a task to this cpu when busy
82 28) # of times pull_task() stole a task from this cpu when another cpu
83 was busy
84 57
85 58
86Domain statistics 59Domain statistics
@@ -89,65 +62,95 @@ One of these is produced per domain for each cpu described. (Note that if
89CONFIG_SMP is not defined, *no* domains are utilized and these lines 62CONFIG_SMP is not defined, *no* domains are utilized and these lines
90will not appear in the output.) 63will not appear in the output.)
91 64
92domain<N> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 65domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
93 66
94The first field is a bit mask indicating what cpus this domain operates over. 67The first field is a bit mask indicating what cpus this domain operates over.
95 68
96The next fifteen are a variety of load_balance() statistics: 69The next 24 are a variety of load_balance() statistics in grouped into types
97 70of idleness (idle, busy, and newly idle):
98 1) # of times in this domain load_balance() was called when the cpu 71
99 was idle 72 1) # of times in this domain load_balance() was called when the
100 2) # of times in this domain load_balance() was called when the cpu 73 cpu was idle
101 was busy 74 2) # of times in this domain load_balance() checked but found
102 3) # of times in this domain load_balance() was called when the cpu 75 the load did not require balancing when the cpu was idle
103 was just becoming idle 76 3) # of times in this domain load_balance() tried to move one or
104 4) # of times in this domain load_balance() tried to move one or more 77 more tasks and failed, when the cpu was idle
105 tasks and failed, when the cpu was idle 78 4) sum of imbalances discovered (if any) with each call to
106 5) # of times in this domain load_balance() tried to move one or more 79 load_balance() in this domain when the cpu was idle
107 tasks and failed, when the cpu was busy 80 5) # of times in this domain pull_task() was called when the cpu
108 6) # of times in this domain load_balance() tried to move one or more 81 was idle
109 tasks and failed, when the cpu was just becoming idle 82 6) # of times in this domain pull_task() was called even though
110 7) sum of imbalances discovered (if any) with each call to 83 the target task was cache-hot when idle
111 load_balance() in this domain when the cpu was idle 84 7) # of times in this domain load_balance() was called but did
112 8) sum of imbalances discovered (if any) with each call to 85 not find a busier queue while the cpu was idle
113 load_balance() in this domain when the cpu was busy 86 8) # of times in this domain a busier queue was found while the
114 9) sum of imbalances discovered (if any) with each call to 87 cpu was idle but no busier group was found
115 load_balance() in this domain when the cpu was just becoming idle 88
116 10) # of times in this domain load_balance() was called but did not find 89 9) # of times in this domain load_balance() was called when the
117 a busier queue while the cpu was idle 90 cpu was busy
118 11) # of times in this domain load_balance() was called but did not find 91 10) # of times in this domain load_balance() checked but found the
119 a busier queue while the cpu was busy 92 load did not require balancing when busy
120 12) # of times in this domain load_balance() was called but did not find 93 11) # of times in this domain load_balance() tried to move one or
121 a busier queue while the cpu was just becoming idle 94 more tasks and failed, when the cpu was busy
122 13) # of times in this domain a busier queue was found while the cpu was 95 12) sum of imbalances discovered (if any) with each call to
123 idle but no busier group was found 96 load_balance() in this domain when the cpu was busy
124 14) # of times in this domain a busier queue was found while the cpu was 97 13) # of times in this domain pull_task() was called when busy
125 busy but no busier group was found 98 14) # of times in this domain pull_task() was called even though the
126 15) # of times in this domain a busier queue was found while the cpu was 99 target task was cache-hot when busy
127 just becoming idle but no busier group was found 100 15) # of times in this domain load_balance() was called but did not
128 101 find a busier queue while the cpu was busy
129Next two are sched_balance_exec() statistics: 102 16) # of times in this domain a busier queue was found while the cpu
130 17) # of times in this domain sched_balance_exec() successfully pushed 103 was busy but no busier group was found
131 a task to a new cpu 104
132 18) # of times in this domain sched_balance_exec() tried but failed to 105 17) # of times in this domain load_balance() was called when the
133 push a task to a new cpu 106 cpu was just becoming idle
134 107 18) # of times in this domain load_balance() checked but found the
135Next two are try_to_wake_up() statistics: 108 load did not require balancing when the cpu was just becoming idle
136 19) # of times in this domain try_to_wake_up() tried to move a task based 109 19) # of times in this domain load_balance() tried to move one or more
137 on affinity and cache warmth 110 tasks and failed, when the cpu was just becoming idle
138 20) # of times in this domain try_to_wake_up() tried to move a task based 111 20) sum of imbalances discovered (if any) with each call to
139 on load balancing 112 load_balance() in this domain when the cpu was just becoming idle
140 113 21) # of times in this domain pull_task() was called when newly idle
114 22) # of times in this domain pull_task() was called even though the
115 target task was cache-hot when just becoming idle
116 23) # of times in this domain load_balance() was called but did not
117 find a busier queue while the cpu was just becoming idle
118 24) # of times in this domain a busier queue was found while the cpu
119 was just becoming idle but no busier group was found
120
121 Next three are active_load_balance() statistics:
122 25) # of times active_load_balance() was called
123 26) # of times active_load_balance() tried to move a task and failed
124 27) # of times active_load_balance() successfully moved a task
125
126 Next three are sched_balance_exec() statistics:
127 28) sbe_cnt is not used
128 29) sbe_balanced is not used
129 30) sbe_pushed is not used
130
131 Next three are sched_balance_fork() statistics:
132 31) sbf_cnt is not used
133 32) sbf_balanced is not used
134 33) sbf_pushed is not used
135
136 Next three are try_to_wake_up() statistics:
137 34) # of times in this domain try_to_wake_up() awoke a task that
138 last ran on a different cpu in this domain
139 35) # of times in this domain try_to_wake_up() moved a task to the
140 waking cpu because it was cache-cold on its own cpu anyway
141 36) # of times in this domain try_to_wake_up() started passive balancing
141 142
142/proc/<pid>/schedstat 143/proc/<pid>/schedstat
143---------------- 144----------------
144schedstats also adds a new /proc/<pid/schedstat file to include some of 145schedstats also adds a new /proc/<pid/schedstat file to include some of
145the same information on a per-process level. There are three fields in 146the same information on a per-process level. There are three fields in
146this file correlating to fields 20, 21, and 22 in the CPU fields, but 147this file correlating for that process to:
147they only apply for that process. 148 1) time spent on the cpu
149 2) time spent waiting on a runqueue
150 3) # of timeslices run on this cpu
148 151
149A program could be easily written to make use of these extra fields to 152A program could be easily written to make use of these extra fields to
150report on how well a particular process or set of processes is faring 153report on how well a particular process or set of processes is faring
151under the scheduler's policies. A simple version of such a program is 154under the scheduler's policies. A simple version of such a program is
152available at 155available at
153 http://eaglet.rain.com/rick/linux/schedstat/v10/latency.c 156 http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index d0926d63406c..484988ed301e 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -8,6 +8,7 @@
8 8
9#include <linux/thread_info.h> 9#include <linux/thread_info.h>
10#include <linux/linkage.h> 10#include <linux/linkage.h>
11#include <linux/list.h>
11 12
12#ifdef CONFIG_DEBUG_PREEMPT 13#ifdef CONFIG_DEBUG_PREEMPT
13 extern void fastcall add_preempt_count(int val); 14 extern void fastcall add_preempt_count(int val);
@@ -60,4 +61,47 @@ do { \
60 61
61#endif 62#endif
62 63
64#ifdef CONFIG_PREEMPT_NOTIFIERS
65
66struct preempt_notifier;
67
68/**
69 * preempt_ops - notifiers called when a task is preempted and rescheduled
70 * @sched_in: we're about to be rescheduled:
71 * notifier: struct preempt_notifier for the task being scheduled
72 * cpu: cpu we're scheduled on
73 * @sched_out: we've just been preempted
74 * notifier: struct preempt_notifier for the task being preempted
75 * next: the task that's kicking us out
76 */
77struct preempt_ops {
78 void (*sched_in)(struct preempt_notifier *notifier, int cpu);
79 void (*sched_out)(struct preempt_notifier *notifier,
80 struct task_struct *next);
81};
82
83/**
84 * preempt_notifier - key for installing preemption notifiers
85 * @link: internal use
86 * @ops: defines the notifier functions to be called
87 *
88 * Usually used in conjunction with container_of().
89 */
90struct preempt_notifier {
91 struct hlist_node link;
92 struct preempt_ops *ops;
93};
94
95void preempt_notifier_register(struct preempt_notifier *notifier);
96void preempt_notifier_unregister(struct preempt_notifier *notifier);
97
98static inline void preempt_notifier_init(struct preempt_notifier *notifier,
99 struct preempt_ops *ops)
100{
101 INIT_HLIST_NODE(&notifier->link);
102 notifier->ops = ops;
103}
104
105#endif
106
63#endif /* __LINUX_PREEMPT_H */ 107#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 33b9b4841ee7..2e490271acf6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -681,7 +681,7 @@ enum cpu_idle_type {
681#define SCHED_LOAD_SHIFT 10 681#define SCHED_LOAD_SHIFT 10
682#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT) 682#define SCHED_LOAD_SCALE (1L << SCHED_LOAD_SHIFT)
683 683
684#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 5) 684#define SCHED_LOAD_SCALE_FUZZ (SCHED_LOAD_SCALE >> 1)
685 685
686#ifdef CONFIG_SMP 686#ifdef CONFIG_SMP
687#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */ 687#define SD_LOAD_BALANCE 1 /* Do load balancing on this domain. */
@@ -786,6 +786,22 @@ extern int partition_sched_domains(cpumask_t *partition1,
786 786
787#endif /* CONFIG_SMP */ 787#endif /* CONFIG_SMP */
788 788
789/*
790 * A runqueue laden with a single nice 0 task scores a weighted_cpuload of
791 * SCHED_LOAD_SCALE. This function returns 1 if any cpu is laden with a
792 * task of nice 0 or enough lower priority tasks to bring up the
793 * weighted_cpuload
794 */
795static inline int above_background_load(void)
796{
797 unsigned long cpu;
798
799 for_each_online_cpu(cpu) {
800 if (weighted_cpuload(cpu) >= SCHED_LOAD_SCALE)
801 return 1;
802 }
803 return 0;
804}
789 805
790struct io_context; /* See blkdev.h */ 806struct io_context; /* See blkdev.h */
791struct cpuset; 807struct cpuset;
@@ -935,6 +951,11 @@ struct task_struct {
935 struct sched_class *sched_class; 951 struct sched_class *sched_class;
936 struct sched_entity se; 952 struct sched_entity se;
937 953
954#ifdef CONFIG_PREEMPT_NOTIFIERS
955 /* list of struct preempt_notifier: */
956 struct hlist_head preempt_notifiers;
957#endif
958
938 unsigned short ioprio; 959 unsigned short ioprio;
939#ifdef CONFIG_BLK_DEV_IO_TRACE 960#ifdef CONFIG_BLK_DEV_IO_TRACE
940 unsigned int btrace_seq; 961 unsigned int btrace_seq;
diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt
index c64ce9c14207..6b066632e40c 100644
--- a/kernel/Kconfig.preempt
+++ b/kernel/Kconfig.preempt
@@ -63,3 +63,6 @@ config PREEMPT_BKL
63 Say Y here if you are building a kernel for a desktop system. 63 Say Y here if you are building a kernel for a desktop system.
64 Say N if you are unsure. 64 Say N if you are unsure.
65 65
66config PREEMPT_NOTIFIERS
67 bool
68
diff --git a/kernel/sched.c b/kernel/sched.c
index 93cf241cfbe9..5c51d7e5dcc1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -53,6 +53,7 @@
53#include <linux/percpu.h> 53#include <linux/percpu.h>
54#include <linux/kthread.h> 54#include <linux/kthread.h>
55#include <linux/seq_file.h> 55#include <linux/seq_file.h>
56#include <linux/sysctl.h>
56#include <linux/syscalls.h> 57#include <linux/syscalls.h>
57#include <linux/times.h> 58#include <linux/times.h>
58#include <linux/tsacct_kern.h> 59#include <linux/tsacct_kern.h>
@@ -263,8 +264,6 @@ struct rq {
263 unsigned int clock_warps, clock_overflows; 264 unsigned int clock_warps, clock_overflows;
264 unsigned int clock_unstable_events; 265 unsigned int clock_unstable_events;
265 266
266 struct sched_class *load_balance_class;
267
268 atomic_t nr_iowait; 267 atomic_t nr_iowait;
269 268
270#ifdef CONFIG_SMP 269#ifdef CONFIG_SMP
@@ -385,13 +384,12 @@ static inline unsigned long long rq_clock(struct rq *rq)
385 */ 384 */
386unsigned long long cpu_clock(int cpu) 385unsigned long long cpu_clock(int cpu)
387{ 386{
388 struct rq *rq = cpu_rq(cpu);
389 unsigned long long now; 387 unsigned long long now;
390 unsigned long flags; 388 unsigned long flags;
391 389
392 spin_lock_irqsave(&rq->lock, flags); 390 local_irq_save(flags);
393 now = rq_clock(rq); 391 now = rq_clock(cpu_rq(cpu));
394 spin_unlock_irqrestore(&rq->lock, flags); 392 local_irq_restore(flags);
395 393
396 return now; 394 return now;
397} 395}
@@ -1592,6 +1590,10 @@ static void __sched_fork(struct task_struct *p)
1592 INIT_LIST_HEAD(&p->run_list); 1590 INIT_LIST_HEAD(&p->run_list);
1593 p->se.on_rq = 0; 1591 p->se.on_rq = 0;
1594 1592
1593#ifdef CONFIG_PREEMPT_NOTIFIERS
1594 INIT_HLIST_HEAD(&p->preempt_notifiers);
1595#endif
1596
1595 /* 1597 /*
1596 * We mark the process as running here, but have not actually 1598 * We mark the process as running here, but have not actually
1597 * inserted it onto the runqueue yet. This guarantees that 1599 * inserted it onto the runqueue yet. This guarantees that
@@ -1673,6 +1675,63 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1673 task_rq_unlock(rq, &flags); 1675 task_rq_unlock(rq, &flags);
1674} 1676}
1675 1677
1678#ifdef CONFIG_PREEMPT_NOTIFIERS
1679
1680/**
1681 * preempt_notifier_register - tell me when current is being being preempted
1682 * and rescheduled
1683 */
1684void preempt_notifier_register(struct preempt_notifier *notifier)
1685{
1686 hlist_add_head(&notifier->link, &current->preempt_notifiers);
1687}
1688EXPORT_SYMBOL_GPL(preempt_notifier_register);
1689
1690/**
1691 * preempt_notifier_unregister - no longer interested in preemption notifications
1692 *
1693 * This is safe to call from within a preemption notifier.
1694 */
1695void preempt_notifier_unregister(struct preempt_notifier *notifier)
1696{
1697 hlist_del(&notifier->link);
1698}
1699EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
1700
1701static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1702{
1703 struct preempt_notifier *notifier;
1704 struct hlist_node *node;
1705
1706 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
1707 notifier->ops->sched_in(notifier, raw_smp_processor_id());
1708}
1709
1710static void
1711fire_sched_out_preempt_notifiers(struct task_struct *curr,
1712 struct task_struct *next)
1713{
1714 struct preempt_notifier *notifier;
1715 struct hlist_node *node;
1716
1717 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
1718 notifier->ops->sched_out(notifier, next);
1719}
1720
1721#else
1722
1723static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1724{
1725}
1726
1727static void
1728fire_sched_out_preempt_notifiers(struct task_struct *curr,
1729 struct task_struct *next)
1730{
1731}
1732
1733#endif
1734
1676/** 1735/**
1677 * prepare_task_switch - prepare to switch tasks 1736 * prepare_task_switch - prepare to switch tasks
1678 * @rq: the runqueue preparing to switch 1737 * @rq: the runqueue preparing to switch
@@ -1685,8 +1744,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1685 * prepare_task_switch sets up locking and calls architecture specific 1744 * prepare_task_switch sets up locking and calls architecture specific
1686 * hooks. 1745 * hooks.
1687 */ 1746 */
1688static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) 1747static inline void
1748prepare_task_switch(struct rq *rq, struct task_struct *prev,
1749 struct task_struct *next)
1689{ 1750{
1751 fire_sched_out_preempt_notifiers(prev, next);
1690 prepare_lock_switch(rq, next); 1752 prepare_lock_switch(rq, next);
1691 prepare_arch_switch(next); 1753 prepare_arch_switch(next);
1692} 1754}
@@ -1728,6 +1790,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1728 prev_state = prev->state; 1790 prev_state = prev->state;
1729 finish_arch_switch(prev); 1791 finish_arch_switch(prev);
1730 finish_lock_switch(rq, prev); 1792 finish_lock_switch(rq, prev);
1793 fire_sched_in_preempt_notifiers(current);
1731 if (mm) 1794 if (mm)
1732 mmdrop(mm); 1795 mmdrop(mm);
1733 if (unlikely(prev_state == TASK_DEAD)) { 1796 if (unlikely(prev_state == TASK_DEAD)) {
@@ -1768,7 +1831,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
1768{ 1831{
1769 struct mm_struct *mm, *oldmm; 1832 struct mm_struct *mm, *oldmm;
1770 1833
1771 prepare_task_switch(rq, next); 1834 prepare_task_switch(rq, prev, next);
1772 mm = next->mm; 1835 mm = next->mm;
1773 oldmm = prev->active_mm; 1836 oldmm = prev->active_mm;
1774 /* 1837 /*
@@ -5140,10 +5203,129 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5140 if (!next) 5203 if (!next)
5141 break; 5204 break;
5142 migrate_dead(dead_cpu, next); 5205 migrate_dead(dead_cpu, next);
5206
5143 } 5207 }
5144} 5208}
5145#endif /* CONFIG_HOTPLUG_CPU */ 5209#endif /* CONFIG_HOTPLUG_CPU */
5146 5210
5211#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
5212
5213static struct ctl_table sd_ctl_dir[] = {
5214 {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, },
5215 {0,},
5216};
5217
5218static struct ctl_table sd_ctl_root[] = {
5219 {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, },
5220 {0,},
5221};
5222
5223static struct ctl_table *sd_alloc_ctl_entry(int n)
5224{
5225 struct ctl_table *entry =
5226 kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL);
5227
5228 BUG_ON(!entry);
5229 memset(entry, 0, n * sizeof(struct ctl_table));
5230
5231 return entry;
5232}
5233
5234static void
5235set_table_entry(struct ctl_table *entry, int ctl_name,
5236 const char *procname, void *data, int maxlen,
5237 mode_t mode, proc_handler *proc_handler)
5238{
5239 entry->ctl_name = ctl_name;
5240 entry->procname = procname;
5241 entry->data = data;
5242 entry->maxlen = maxlen;
5243 entry->mode = mode;
5244 entry->proc_handler = proc_handler;
5245}
5246
5247static struct ctl_table *
5248sd_alloc_ctl_domain_table(struct sched_domain *sd)
5249{
5250 struct ctl_table *table = sd_alloc_ctl_entry(14);
5251
5252 set_table_entry(&table[0], 1, "min_interval", &sd->min_interval,
5253 sizeof(long), 0644, proc_doulongvec_minmax);
5254 set_table_entry(&table[1], 2, "max_interval", &sd->max_interval,
5255 sizeof(long), 0644, proc_doulongvec_minmax);
5256 set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx,
5257 sizeof(int), 0644, proc_dointvec_minmax);
5258 set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx,
5259 sizeof(int), 0644, proc_dointvec_minmax);
5260 set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx,
5261 sizeof(int), 0644, proc_dointvec_minmax);
5262 set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx,
5263 sizeof(int), 0644, proc_dointvec_minmax);
5264 set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx,
5265 sizeof(int), 0644, proc_dointvec_minmax);
5266 set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor,
5267 sizeof(int), 0644, proc_dointvec_minmax);
5268 set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
5269 sizeof(int), 0644, proc_dointvec_minmax);
5270 set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time,
5271 sizeof(long long), 0644, proc_doulongvec_minmax);
5272 set_table_entry(&table[10], 11, "cache_nice_tries",
5273 &sd->cache_nice_tries,
5274 sizeof(int), 0644, proc_dointvec_minmax);
5275 set_table_entry(&table[12], 13, "flags", &sd->flags,
5276 sizeof(int), 0644, proc_dointvec_minmax);
5277
5278 return table;
5279}
5280
5281static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5282{
5283 struct ctl_table *entry, *table;
5284 struct sched_domain *sd;
5285 int domain_num = 0, i;
5286 char buf[32];
5287
5288 for_each_domain(cpu, sd)
5289 domain_num++;
5290 entry = table = sd_alloc_ctl_entry(domain_num + 1);
5291
5292 i = 0;
5293 for_each_domain(cpu, sd) {
5294 snprintf(buf, 32, "domain%d", i);
5295 entry->ctl_name = i + 1;
5296 entry->procname = kstrdup(buf, GFP_KERNEL);
5297 entry->mode = 0755;
5298 entry->child = sd_alloc_ctl_domain_table(sd);
5299 entry++;
5300 i++;
5301 }
5302 return table;
5303}
5304
5305static struct ctl_table_header *sd_sysctl_header;
5306static void init_sched_domain_sysctl(void)
5307{
5308 int i, cpu_num = num_online_cpus();
5309 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
5310 char buf[32];
5311
5312 sd_ctl_dir[0].child = entry;
5313
5314 for (i = 0; i < cpu_num; i++, entry++) {
5315 snprintf(buf, 32, "cpu%d", i);
5316 entry->ctl_name = i + 1;
5317 entry->procname = kstrdup(buf, GFP_KERNEL);
5318 entry->mode = 0755;
5319 entry->child = sd_alloc_ctl_cpu_table(i);
5320 }
5321 sd_sysctl_header = register_sysctl_table(sd_ctl_root);
5322}
5323#else
5324static void init_sched_domain_sysctl(void)
5325{
5326}
5327#endif
5328
5147/* 5329/*
5148 * migration_call - callback that gets triggered when a CPU is added. 5330 * migration_call - callback that gets triggered when a CPU is added.
5149 * Here we can start up the necessary migration thread for the new CPU. 5331 * Here we can start up the necessary migration thread for the new CPU.
@@ -6249,6 +6431,8 @@ void __init sched_init_smp(void)
6249 /* XXX: Theoretical race here - CPU may be hotplugged now */ 6431 /* XXX: Theoretical race here - CPU may be hotplugged now */
6250 hotcpu_notifier(update_sched_domains, 0); 6432 hotcpu_notifier(update_sched_domains, 0);
6251 6433
6434 init_sched_domain_sysctl();
6435
6252 /* Move init over to a non-isolated CPU */ 6436 /* Move init over to a non-isolated CPU */
6253 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 6437 if (set_cpus_allowed(current, non_isolated_cpus) < 0)
6254 BUG(); 6438 BUG();
@@ -6335,6 +6519,10 @@ void __init sched_init(void)
6335 6519
6336 set_load_weight(&init_task); 6520 set_load_weight(&init_task);
6337 6521
6522#ifdef CONFIG_PREEMPT_NOTIFIERS
6523 INIT_HLIST_HEAD(&init_task.preempt_notifiers);
6524#endif
6525
6338#ifdef CONFIG_SMP 6526#ifdef CONFIG_SMP
6339 nr_cpu_ids = highest_cpu + 1; 6527 nr_cpu_ids = highest_cpu + 1;
6340 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); 6528 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 29f2c21e7da2..42970f723a97 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -186,7 +186,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
186 return 0; 186 return 0;
187} 187}
188 188
189void sysrq_sched_debug_show(void) 189static void sysrq_sched_debug_show(void)
190{ 190{
191 sched_debug_show(NULL, NULL); 191 sched_debug_show(NULL, NULL);
192} 192}