From e107be36efb2a233833e8c9899039a370e4b2318 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: arch preempt notifier mechanism This adds a general mechanism whereby a task can request the scheduler to notify it whenever it is preempted or scheduled back in. This allows the task to swap any special-purpose registers like the fpu or Intel's VT registers. Signed-off-by: Avi Kivity [ mingo@elte.hu: fixes, cleanups ] Signed-off-by: Ingo Molnar --- kernel/Kconfig.preempt | 3 +++ kernel/sched.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index c64ce9c14207..6b066632e40c 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -63,3 +63,6 @@ config PREEMPT_BKL Say Y here if you are building a kernel for a desktop system. Say N if you are unsure. +config PREEMPT_NOTIFIERS + bool + diff --git a/kernel/sched.c b/kernel/sched.c index 93cf241cfbe9..e901aa59f206 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1592,6 +1592,10 @@ static void __sched_fork(struct task_struct *p) INIT_LIST_HEAD(&p->run_list); p->se.on_rq = 0; +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&p->preempt_notifiers); +#endif + /* * We mark the process as running here, but have not actually * inserted it onto the runqueue yet. This guarantees that @@ -1673,6 +1677,63 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) task_rq_unlock(rq, &flags); } +#ifdef CONFIG_PREEMPT_NOTIFIERS + +/** + * preempt_notifier_register - tell me when current is being being preempted + * and rescheduled + */ +void preempt_notifier_register(struct preempt_notifier *notifier) +{ + hlist_add_head(¬ifier->link, ¤t->preempt_notifiers); +} +EXPORT_SYMBOL_GPL(preempt_notifier_register); + +/** + * preempt_notifier_unregister - no longer interested in preemption notifications + * + * This is safe to call from within a preemption notifier. + */ +void preempt_notifier_unregister(struct preempt_notifier *notifier) +{ + hlist_del(¬ifier->link); +} +EXPORT_SYMBOL_GPL(preempt_notifier_unregister); + +static void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ + struct preempt_notifier *notifier; + struct hlist_node *node; + + hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + notifier->ops->sched_in(notifier, raw_smp_processor_id()); +} + +static void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ + struct preempt_notifier *notifier; + struct hlist_node *node; + + hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) + notifier->ops->sched_out(notifier, next); +} + +#else + +static void fire_sched_in_preempt_notifiers(struct task_struct *curr) +{ +} + +static void +fire_sched_out_preempt_notifiers(struct task_struct *curr, + struct task_struct *next) +{ +} + +#endif + /** * prepare_task_switch - prepare to switch tasks * @rq: the runqueue preparing to switch @@ -1685,8 +1746,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * prepare_task_switch sets up locking and calls architecture specific * hooks. */ -static inline void prepare_task_switch(struct rq *rq, struct task_struct *next) +static inline void +prepare_task_switch(struct rq *rq, struct task_struct *prev, + struct task_struct *next) { + fire_sched_out_preempt_notifiers(prev, next); prepare_lock_switch(rq, next); prepare_arch_switch(next); } @@ -1728,6 +1792,7 @@ static inline void finish_task_switch(struct rq *rq, struct task_struct *prev) prev_state = prev->state; finish_arch_switch(prev); finish_lock_switch(rq, prev); + fire_sched_in_preempt_notifiers(current); if (mm) mmdrop(mm); if (unlikely(prev_state == TASK_DEAD)) { @@ -1768,7 +1833,7 @@ context_switch(struct rq *rq, struct task_struct *prev, { struct mm_struct *mm, *oldmm; - prepare_task_switch(rq, next); + prepare_task_switch(rq, prev, next); mm = next->mm; oldmm = prev->active_mm; /* @@ -6335,6 +6400,10 @@ void __init sched_init(void) set_load_weight(&init_task); +#ifdef CONFIG_PREEMPT_NOTIFIERS + INIT_HLIST_HEAD(&init_task.preempt_notifiers); +#endif + #ifdef CONFIG_SMP nr_cpu_ids = highest_cpu + 1; open_softirq(SCHED_SOFTIRQ, run_rebalance_domains, NULL); -- cgit v1.2.2 From 018a2212950457b1093e504cd834aa0fe749da6c Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: remove unused rq->load_balance_class Remove unused rq->load_balance_class. Signed-off-by: Satoru Takeuchi Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index e901aa59f206..cc6c1192c448 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -263,8 +263,6 @@ struct rq { unsigned int clock_warps, clock_overflows; unsigned int clock_unstable_events; - struct sched_class *load_balance_class; - atomic_t nr_iowait; #ifdef CONFIG_SMP -- cgit v1.2.2 From 2cd4d0ea19713304963dbb2de5073700bfe253f5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: make cpu_clock() not use the rq clock it is enough to disable interrupts to get the precise rq-clock of the local CPU. this also solves an NMI watchdog regression: the NMI watchdog calls touch_softlockup_watchdog(), which might deadlock on rq->lock if the NMI hits an rq-locked critical section. Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index cc6c1192c448..3eed860cf292 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -383,13 +383,12 @@ static inline unsigned long long rq_clock(struct rq *rq) */ unsigned long long cpu_clock(int cpu) { - struct rq *rq = cpu_rq(cpu); unsigned long long now; unsigned long flags; - spin_lock_irqsave(&rq->lock, flags); - now = rq_clock(rq); - spin_unlock_irqrestore(&rq->lock, flags); + local_irq_save(flags); + now = rq_clock(cpu_rq(cpu)); + local_irq_restore(flags); return now; } -- cgit v1.2.2 From f33734619371ae40f34bbce001938408e6634f05 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: mark sysrq_sched_debug_show() static Only sched.c uses sysrq_sched_debug_show, and sched.c includes sched_debug.c, so all uses of sysrq_sched_debug_show occur in the same source file. Eliminates a sparse warning: warning: symbol 'sysrq_sched_debug_show' was not declared. Should it be static? Signed-off-by: Josh Triplett Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 29f2c21e7da2..42970f723a97 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -186,7 +186,7 @@ static int sched_debug_show(struct seq_file *m, void *v) return 0; } -void sysrq_sched_debug_show(void) +static void sysrq_sched_debug_show(void) { sched_debug_show(NULL, NULL); } -- cgit v1.2.2 From e692ab53473c93c0d0820618c97aa74a62ab67da Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 26 Jul 2007 13:40:43 +0200 Subject: [PATCH] sched: debug feature - make the sched-domains tree runtime-tweakable debugging feature: make the sched-domains tree runtime-tweakable. Signed-off-by: Andrew Morton [ mingo@elte.hu: made it depend on CONFIG_SCHED_DEBUG & small updates ] Signed-off-by: Ingo Molnar --- kernel/sched.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 3eed860cf292..5c51d7e5dcc1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include @@ -5202,10 +5203,129 @@ static void migrate_dead_tasks(unsigned int dead_cpu) if (!next) break; migrate_dead(dead_cpu, next); + } } #endif /* CONFIG_HOTPLUG_CPU */ +#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) + +static struct ctl_table sd_ctl_dir[] = { + {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, + {0,}, +}; + +static struct ctl_table sd_ctl_root[] = { + {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, + {0,}, +}; + +static struct ctl_table *sd_alloc_ctl_entry(int n) +{ + struct ctl_table *entry = + kmalloc(n * sizeof(struct ctl_table), GFP_KERNEL); + + BUG_ON(!entry); + memset(entry, 0, n * sizeof(struct ctl_table)); + + return entry; +} + +static void +set_table_entry(struct ctl_table *entry, int ctl_name, + const char *procname, void *data, int maxlen, + mode_t mode, proc_handler *proc_handler) +{ + entry->ctl_name = ctl_name; + entry->procname = procname; + entry->data = data; + entry->maxlen = maxlen; + entry->mode = mode; + entry->proc_handler = proc_handler; +} + +static struct ctl_table * +sd_alloc_ctl_domain_table(struct sched_domain *sd) +{ + struct ctl_table *table = sd_alloc_ctl_entry(14); + + set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, + sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, + sizeof(long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[9], 10, "cache_hot_time", &sd->cache_hot_time, + sizeof(long long), 0644, proc_doulongvec_minmax); + set_table_entry(&table[10], 11, "cache_nice_tries", + &sd->cache_nice_tries, + sizeof(int), 0644, proc_dointvec_minmax); + set_table_entry(&table[12], 13, "flags", &sd->flags, + sizeof(int), 0644, proc_dointvec_minmax); + + return table; +} + +static ctl_table *sd_alloc_ctl_cpu_table(int cpu) +{ + struct ctl_table *entry, *table; + struct sched_domain *sd; + int domain_num = 0, i; + char buf[32]; + + for_each_domain(cpu, sd) + domain_num++; + entry = table = sd_alloc_ctl_entry(domain_num + 1); + + i = 0; + for_each_domain(cpu, sd) { + snprintf(buf, 32, "domain%d", i); + entry->ctl_name = i + 1; + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0755; + entry->child = sd_alloc_ctl_domain_table(sd); + entry++; + i++; + } + return table; +} + +static struct ctl_table_header *sd_sysctl_header; +static void init_sched_domain_sysctl(void) +{ + int i, cpu_num = num_online_cpus(); + struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); + char buf[32]; + + sd_ctl_dir[0].child = entry; + + for (i = 0; i < cpu_num; i++, entry++) { + snprintf(buf, 32, "cpu%d", i); + entry->ctl_name = i + 1; + entry->procname = kstrdup(buf, GFP_KERNEL); + entry->mode = 0755; + entry->child = sd_alloc_ctl_cpu_table(i); + } + sd_sysctl_header = register_sysctl_table(sd_ctl_root); +} +#else +static void init_sched_domain_sysctl(void) +{ +} +#endif + /* * migration_call - callback that gets triggered when a CPU is added. * Here we can start up the necessary migration thread for the new CPU. @@ -6311,6 +6431,8 @@ void __init sched_init_smp(void) /* XXX: Theoretical race here - CPU may be hotplugged now */ hotcpu_notifier(update_sched_domains, 0); + init_sched_domain_sysctl(); + /* Move init over to a non-isolated CPU */ if (set_cpus_allowed(current, non_isolated_cpus) < 0) BUG(); -- cgit v1.2.2