aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c204
1 files changed, 196 insertions, 8 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 93cf241cfbe9..5c51d7e5dcc1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -53,6 +53,7 @@
53#include <linux/percpu.h> 53#include <linux/percpu.h>
54#include <linux/kthread.h> 54#include <linux/kthread.h>
55#include <linux/seq_file.h> 55#include <linux/seq_file.h>
56#include <linux/sysctl.h>
56#include <linux/syscalls.h> 57#include <linux/syscalls.h>
57#include <linux/times.h> 58#include <linux/times.h>
58#include <linux/tsacct_kern.h> 59#include <linux/tsacct_kern.h>
@@ -263,8 +264,6 @@ struct rq {
263 unsigned int clock_warps, clock_overflows; 264 unsigned int clock_warps, clock_overflows;
264 unsigned int clock_unstable_events; 265 unsigned int clock_unstable_events;
265 266
266 struct sched_class *load_balance_class;
267
268 atomic_t nr_iowait; 267 atomic_t nr_iowait;
269 268
270#ifdef CONFIG_SMP 269#ifdef CONFIG_SMP
@@ -385,13 +384,12 @@ static inline unsigned long long rq_clock(struct rq *rq)
385 */ 384 */
386unsigned long long cpu_clock(int cpu) 385unsigned long long cpu_clock(int cpu)
387{ 386{
388 struct rq *rq = cpu_rq(cpu);
389 unsigned long long now; 387 unsigned long long now;
390 unsigned long flags; 388 unsigned long flags;
391 389
392 spin_lock_irqsave(&rq->lock, flags); 390 local_irq_save(flags);
393 now = rq_clock(rq); 391 now = rq_clock(cpu_rq(cpu));
394 spin_unlock_irqrestore(&rq->lock, flags); 392 local_irq_restore(flags);
395 393
396 return now; 394 return now;
397} 395}
@@ -1592,6 +1590,10 @@ static void __sched_fork(struct task_struct *p)
1592 INIT_LIST_HEAD(&p->run_list); 1590 INIT_LIST_HEAD(&p->run_list);
1593 p->se.on_rq = 0; 1591 p->se.on_rq = 0;
1594 1592
1593#ifdef CONFIG_PREEMPT_NOTIFIERS
1594 INIT_HLIST_HEAD(&p->preempt_notifiers);
1595#endif
1596
1595 /* 1597 /*
1596 * We mark the process as running here, but have not actually 1598 * We mark the process as running here, but have not actually
1597 * inserted it onto the runqueue yet. This guarantees that 1599 * inserted it onto the runqueue yet. This guarantees that
@@ -1673,6 +1675,63 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1673 task_rq_unlock(rq, &flags); 1675 task_rq_unlock(rq, &flags);
1674} 1676}
1675 1677
1678#ifdef CONFIG_PREEMPT_NOTIFIERS
1679
1680/**
1681 * preempt_notifier_register - tell me when current is being being preempted
1682 * and rescheduled
1683 */
1684void preempt_notifier_register(struct preempt_notifier *notifier)
1685{
1686 hlist_add_head(&notifier->link, &current->preempt_notifiers);
1687}
1688EXPORT_SYMBOL_GPL(preempt_notifier_register);
1689
1690/**
1691 * preempt_notifier_unregister - no longer interested in preemption notifications
1692 *
1693 * This is safe to call from within a preemption notifier.
1694 */
1695void preempt_notifier_unregister(struct preempt_notifier *notifier)
1696{
1697 hlist_del(&notifier->link);
1698}
1699EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
1700
1701static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1702{
1703 struct preempt_notifier *notifier;
1704 struct hlist_node *node;
1705
1706 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
1707 notifier->ops->sched_in(notifier, raw_smp_processor_id());
1708}
1709
1710static void
1711fire_sched_out_preempt_notifiers(struct task_struct *curr,
1712 struct task_struct *next)
1713{
1714 struct preempt_notifier *notifier;
1715 struct hlist_node *node;
1716
1717 hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
1718 notifier->ops->sched_out(notifier, next);
1719}
1720
1721#else
1722
1723static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1724{
1725}
1726
1727static void
1728fire_sched_out_preempt_notifiers(struct task_struct *curr,
1729 struct task_struct *next)
1730{
1731}
1732
1733#endif
1734
1676/** 1735/**
1677 * prepare_task_switch - prepare to switch tasks 1736 * prepare_task_switch - prepare to switch tasks
1678 * @rq: the runqueue preparing to switch 1737 * @rq: the runqueue preparing to switch
@@ -1685,8 +1744,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1685 * prepare_task_switch sets up locking and calls architecture specific 1744 * prepare_task_switch sets up locking and calls architecture specific
1686 * hooks. 1745 * hooks.
1687 */ 1746 */
1688static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) 1747static inline void
1748prepare_task_switch(struct rq *rq, struct task_struct *prev,
1749 struct task_struct *next)
1689{ 1750{
1751 fire_sched_out_preempt_notifiers(prev, next);
1690 prepare_lock_switch(rq, next); 1752 prepare_lock_switch(rq, next);
1691 prepare_arch_switch(next); 1753 prepare_arch_switch(next);
1692} 1754}
@@ -1728,6 +1790,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev)
1728 prev_state = prev->state; 1790 prev_state = prev->state;
1729 finish_arch_switch(prev); 1791 finish_arch_switch(prev);
1730 finish_lock_switch(rq, prev); 1792 finish_lock_switch(rq, prev);
1793 fire_sched_in_preempt_notifiers(current);
1731 if (mm) 1794 if (mm)
1732 mmdrop(mm); 1795 mmdrop(mm);
1733 if (unlikely(prev_state == TASK_DEAD)) { 1796 if (unlikely(prev_state == TASK_DEAD)) {
@@ -1768,7 +1831,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
1768{ 1831{
1769 struct mm_struct *mm, *oldmm; 1832 struct mm_struct *mm, *oldmm;
1770 1833
1771 prepare_task_switch(rq, next); 1834 prepare_task_switch(rq, prev, next);
1772 mm = next->mm; 1835 mm = next->mm;
1773 oldmm = prev->active_mm; 1836 oldmm = prev->active_mm;
1774 /* 1837 /*
@@ -5140,10 +5203,129 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5140 if (!next) 5203 if (!next)
5141 break; 5204 break;
5142 migrate_dead(dead_cpu, next); 5205 migrate_dead(dead_cpu, next);
5206
5143 } 5207 }
5144} 5208}
5145#endif /* CONFIG_HOTPLUG_CPU */ 5209#endif /* CONFIG_HOTPLUG_CPU */
5146 5210
5211#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
5212
5213static struct ctl_table sd_ctl_dir[] = {
5214 {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, },
5215 {0,},
5216};
5217
5218static struct ctl_table sd_ctl_root[] = {
5219 {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, },
5220 {0,},
5221};
5222
5223static struct ctl_table *sd_alloc_ctl_entry(int n)
5224{
5225 struct ctl_table *entry =
5226 kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL);
5227
5228 BUG_ON(!entry);
5229 memset(entry, 0, n * sizeof(struct ctl_table));
5230
5231 return entry;
5232}
5233
5234static void
5235set_table_entry(struct ctl_table *entry, int ctl_name,
5236 const char *procname, void *data, int maxlen,
5237 mode_t mode, proc_handler *proc_handler)
5238{
5239 entry->ctl_name = ctl_name;
5240 entry->procname = procname;
5241 entry->data = data;
5242 entry->maxlen = maxlen;
5243 entry->mode = mode;
5244 entry->proc_handler = proc_handler;
5245}
5246
5247static struct ctl_table *
5248sd_alloc_ctl_domain_table(struct sched_domain *sd)
5249{
5250 struct ctl_table *table = sd_alloc_ctl_entry(14);
5251
5252 set_table_entry(&table[0], 1, "min_interval", &sd->min_interval,
5253 sizeof(long), 0644, proc_doulongvec_minmax);
5254 set_table_entry(&table[1], 2, "max_interval", &sd->max_interval,
5255 sizeof(long), 0644, proc_doulongvec_minmax);
5256 set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx,
5257 sizeof(int), 0644, proc_dointvec_minmax);
5258 set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx,
5259 sizeof(int), 0644, proc_dointvec_minmax);
5260 set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx,
5261 sizeof(int), 0644, proc_dointvec_minmax);
5262 set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx,
5263 sizeof(int), 0644, proc_dointvec_minmax);
5264 set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx,
5265 sizeof(int), 0644, proc_dointvec_minmax);
5266 set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor,
5267 sizeof(int), 0644, proc_dointvec_minmax);
5268 set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct,
5269 sizeof(int), 0644, proc_dointvec_minmax);
5270 set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time,
5271 sizeof(long long), 0644, proc_doulongvec_minmax);
5272 set_table_entry(&table[10], 11, "cache_nice_tries",
5273 &sd->cache_nice_tries,
5274 sizeof(int), 0644, proc_dointvec_minmax);
5275 set_table_entry(&table[12], 13, "flags", &sd->flags,
5276 sizeof(int), 0644, proc_dointvec_minmax);
5277
5278 return table;
5279}
5280
5281static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5282{
5283 struct ctl_table *entry, *table;
5284 struct sched_domain *sd;
5285 int domain_num = 0, i;
5286 char buf[32];
5287
5288 for_each_domain(cpu, sd)
5289 domain_num++;
5290 entry = table = sd_alloc_ctl_entry(domain_num + 1);
5291
5292 i = 0;
5293 for_each_domain(cpu, sd) {
5294 snprintf(buf, 32, "domain%d", i);
5295 entry->ctl_name = i + 1;
5296 entry->procname = kstrdup(buf, GFP_KERNEL);
5297 entry->mode = 0755;
5298 entry->child = sd_alloc_ctl_domain_table(sd);
5299 entry++;
5300 i++;
5301 }
5302 return table;
5303}
5304
5305static struct ctl_table_header *sd_sysctl_header;
5306static void init_sched_domain_sysctl(void)
5307{
5308 int i, cpu_num = num_online_cpus();
5309 struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
5310 char buf[32];
5311
5312 sd_ctl_dir[0].child = entry;
5313
5314 for (i = 0; i < cpu_num; i++, entry++) {
5315 snprintf(buf, 32, "cpu%d", i);
5316 entry->ctl_name = i + 1;
5317 entry->procname = kstrdup(buf, GFP_KERNEL);
5318 entry->mode = 0755;
5319 entry->child = sd_alloc_ctl_cpu_table(i);
5320 }
5321 sd_sysctl_header = register_sysctl_table(sd_ctl_root);
5322}
5323#else
5324static void init_sched_domain_sysctl(void)
5325{
5326}
5327#endif
5328
5147/* 5329/*
5148 * migration_call - callback that gets triggered when a CPU is added. 5330 * migration_call - callback that gets triggered when a CPU is added.
5149 * Here we can start up the necessary migration thread for the new CPU. 5331 * Here we can start up the necessary migration thread for the new CPU.
@@ -6249,6 +6431,8 @@ void __init sched_init_smp(void)
6249 /* XXX: Theoretical race here - CPU may be hotplugged now */ 6431 /* XXX: Theoretical race here - CPU may be hotplugged now */
6250 hotcpu_notifier(update_sched_domains, 0); 6432 hotcpu_notifier(update_sched_domains, 0);
6251 6433
6434 init_sched_domain_sysctl();
6435
6252 /* Move init over to a non-isolated CPU */ 6436 /* Move init over to a non-isolated CPU */
6253 if (set_cpus_allowed(current, non_isolated_cpus) < 0) 6437 if (set_cpus_allowed(current, non_isolated_cpus) < 0)
6254 BUG(); 6438 BUG();
@@ -6335,6 +6519,10 @@ void __init sched_init(void)
6335 6519
6336 set_load_weight(&init_task); 6520 set_load_weight(&init_task);
6337 6521
6522#ifdef CONFIG_PREEMPT_NOTIFIERS
6523 INIT_HLIST_HEAD(&init_task.preempt_notifiers);
6524#endif
6525
6338#ifdef CONFIG_SMP 6526#ifdef CONFIG_SMP
6339 nr_cpu_ids = highest_cpu + 1; 6527 nr_cpu_ids = highest_cpu + 1;
6340 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); 6528 open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL);