aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-06-13 12:19:48 -0400
committerTejun Heo <tj@kernel.org>2010-06-13 12:19:48 -0400
commit53c5f5ba42c194cb13dd3083ed425f2c5b1ec439 (patch)
tree72aea970204f3cbf8bd159a8936b36fc61b55718 /kernel
parent7e27d6e778cd87b6f2415515d7127eba53fe5d02 (diff)
parent21aa9af03d06cb1d19a3738e5cf12acff984e69b (diff)
Merge branch 'sched-wq' of ../wq into cmwq-base
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c6
-rw-r--r--kernel/cpuset.c21
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/sched.c205
-rw-r--r--kernel/workqueue_sched.h16
5 files changed, 170 insertions, 80 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 97d1b426a4ac..f6e726f18491 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
235 return -EINVAL; 235 return -EINVAL;
236 236
237 cpu_hotplug_begin(); 237 cpu_hotplug_begin();
238 set_cpu_active(cpu, false);
239 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); 238 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
240 if (err) { 239 if (err) {
241 set_cpu_active(cpu, true);
242
243 nr_calls--; 240 nr_calls--;
244 __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); 241 __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
245 printk("%s: attempt to take down CPU %u failed\n", 242 printk("%s: attempt to take down CPU %u failed\n",
@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
249 246
250 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 247 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
251 if (err) { 248 if (err) {
252 set_cpu_active(cpu, true);
253 /* CPU didn't die: tell everyone. Can't complain. */ 249 /* CPU didn't die: tell everyone. Can't complain. */
254 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); 250 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
255 251
@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
321 goto out_notify; 317 goto out_notify;
322 BUG_ON(!cpu_online(cpu)); 318 BUG_ON(!cpu_online(cpu));
323 319
324 set_cpu_active(cpu, true);
325
326 /* Now call notifier in preparation. */ 320 /* Now call notifier in preparation. */
327 cpu_notify(CPU_ONLINE | mod, hcpu); 321 cpu_notify(CPU_ONLINE | mod, hcpu);
328 322
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 02b9611eadde..05727dcaa80d 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
2113 * but making no active use of cpusets. 2113 * but making no active use of cpusets.
2114 * 2114 *
2115 * This routine ensures that top_cpuset.cpus_allowed tracks 2115 * This routine ensures that top_cpuset.cpus_allowed tracks
2116 * cpu_online_map on each CPU hotplug (cpuhp) event. 2116 * cpu_active_mask on each CPU hotplug (cpuhp) event.
2117 * 2117 *
2118 * Called within get_online_cpus(). Needs to call cgroup_lock() 2118 * Called within get_online_cpus(). Needs to call cgroup_lock()
2119 * before calling generate_sched_domains(). 2119 * before calling generate_sched_domains().
2120 */ 2120 */
2121static int cpuset_track_online_cpus(struct notifier_block *unused_nb, 2121void __cpuexit cpuset_update_active_cpus(void)
2122 unsigned long phase, void *unused_cpu)
2123{ 2122{
2124 struct sched_domain_attr *attr; 2123 struct sched_domain_attr *attr;
2125 cpumask_var_t *doms; 2124 cpumask_var_t *doms;
2126 int ndoms; 2125 int ndoms;
2127 2126
2128 switch (phase) {
2129 case CPU_ONLINE:
2130 case CPU_ONLINE_FROZEN:
2131 case CPU_DOWN_PREPARE:
2132 case CPU_DOWN_PREPARE_FROZEN:
2133 case CPU_DOWN_FAILED:
2134 case CPU_DOWN_FAILED_FROZEN:
2135 break;
2136
2137 default:
2138 return NOTIFY_DONE;
2139 }
2140
2141 cgroup_lock(); 2127 cgroup_lock();
2142 mutex_lock(&callback_mutex); 2128 mutex_lock(&callback_mutex);
2143 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); 2129 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
2148 2134
2149 /* Have scheduler rebuild the domains */ 2135 /* Have scheduler rebuild the domains */
2150 partition_sched_domains(ndoms, doms, attr); 2136 partition_sched_domains(ndoms, doms, attr);
2151
2152 return NOTIFY_OK;
2153} 2137}
2154 2138
2155#ifdef CONFIG_MEMORY_HOTPLUG 2139#ifdef CONFIG_MEMORY_HOTPLUG
@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
2203 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); 2187 cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
2204 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; 2188 top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
2205 2189
2206 hotcpu_notifier(cpuset_track_online_cpus, 0);
2207 hotplug_memory_notifier(cpuset_track_online_nodes, 10); 2190 hotplug_memory_notifier(cpuset_track_online_nodes, 10);
2208 2191
2209 cpuset_wq = create_singlethread_workqueue("cpuset"); 2192 cpuset_wq = create_singlethread_workqueue("cpuset");
diff --git a/kernel/fork.c b/kernel/fork.c
index b6cce14ba047..a82a65cef741 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
907{ 907{
908 unsigned long new_flags = p->flags; 908 unsigned long new_flags = p->flags;
909 909
910 new_flags &= ~PF_SUPERPRIV; 910 new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
911 new_flags |= PF_FORKNOEXEC; 911 new_flags |= PF_FORKNOEXEC;
912 new_flags |= PF_STARTING; 912 new_flags |= PF_STARTING;
913 p->flags = new_flags; 913 p->flags = new_flags;
diff --git a/kernel/sched.c b/kernel/sched.c
index f8b8996228dd..edd5a54b95da 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -77,6 +77,7 @@
77#include <asm/irq_regs.h> 77#include <asm/irq_regs.h>
78 78
79#include "sched_cpupri.h" 79#include "sched_cpupri.h"
80#include "workqueue_sched.h"
80 81
81#define CREATE_TRACE_POINTS 82#define CREATE_TRACE_POINTS
82#include <trace/events/sched.h> 83#include <trace/events/sched.h>
@@ -2267,11 +2268,55 @@ static void update_avg(u64 *avg, u64 sample)
2267} 2268}
2268#endif 2269#endif
2269 2270
2270/*** 2271static inline void ttwu_activate(struct task_struct *p, struct rq *rq,
2272 bool is_sync, bool is_migrate, bool is_local,
2273 unsigned long en_flags)
2274{
2275 schedstat_inc(p, se.statistics.nr_wakeups);
2276 if (is_sync)
2277 schedstat_inc(p, se.statistics.nr_wakeups_sync);
2278 if (is_migrate)
2279 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
2280 if (is_local)
2281 schedstat_inc(p, se.statistics.nr_wakeups_local);
2282 else
2283 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2284
2285 activate_task(rq, p, en_flags);
2286}
2287
2288static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2289 int wake_flags, bool success)
2290{
2291 trace_sched_wakeup(p, success);
2292 check_preempt_curr(rq, p, wake_flags);
2293
2294 p->state = TASK_RUNNING;
2295#ifdef CONFIG_SMP
2296 if (p->sched_class->task_woken)
2297 p->sched_class->task_woken(rq, p);
2298
2299 if (unlikely(rq->idle_stamp)) {
2300 u64 delta = rq->clock - rq->idle_stamp;
2301 u64 max = 2*sysctl_sched_migration_cost;
2302
2303 if (delta > max)
2304 rq->avg_idle = max;
2305 else
2306 update_avg(&rq->avg_idle, delta);
2307 rq->idle_stamp = 0;
2308 }
2309#endif
2310 /* if a worker is waking up, notify workqueue */
2311 if ((p->flags & PF_WQ_WORKER) && success)
2312 wq_worker_waking_up(p, cpu_of(rq));
2313}
2314
2315/**
2271 * try_to_wake_up - wake up a thread 2316 * try_to_wake_up - wake up a thread
2272 * @p: the to-be-woken-up thread 2317 * @p: the thread to be awakened
2273 * @state: the mask of task states that can be woken 2318 * @state: the mask of task states that can be woken
2274 * @sync: do a synchronous wakeup? 2319 * @wake_flags: wake modifier flags (WF_*)
2275 * 2320 *
2276 * Put it on the run-queue if it's not already there. The "current" 2321 * Put it on the run-queue if it's not already there. The "current"
2277 * thread is always on the run-queue (except when the actual 2322 * thread is always on the run-queue (except when the actual
@@ -2279,7 +2324,8 @@ static void update_avg(u64 *avg, u64 sample)
2279 * the simpler "current->state = TASK_RUNNING" to mark yourself 2324 * the simpler "current->state = TASK_RUNNING" to mark yourself
2280 * runnable without the overhead of this. 2325 * runnable without the overhead of this.
2281 * 2326 *
2282 * returns failure only if the task is already active. 2327 * Returns %true if @p was woken up, %false if it was already running
2328 * or @state didn't match @p's state.
2283 */ 2329 */
2284static int try_to_wake_up(struct task_struct *p, unsigned int state, 2330static int try_to_wake_up(struct task_struct *p, unsigned int state,
2285 int wake_flags) 2331 int wake_flags)
@@ -2359,38 +2405,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2359 2405
2360out_activate: 2406out_activate:
2361#endif /* CONFIG_SMP */ 2407#endif /* CONFIG_SMP */
2362 schedstat_inc(p, se.statistics.nr_wakeups); 2408 ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu,
2363 if (wake_flags & WF_SYNC) 2409 cpu == this_cpu, en_flags);
2364 schedstat_inc(p, se.statistics.nr_wakeups_sync);
2365 if (orig_cpu != cpu)
2366 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
2367 if (cpu == this_cpu)
2368 schedstat_inc(p, se.statistics.nr_wakeups_local);
2369 else
2370 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2371 activate_task(rq, p, en_flags);
2372 success = 1; 2410 success = 1;
2373
2374out_running: 2411out_running:
2375 trace_sched_wakeup(p, success); 2412 ttwu_post_activation(p, rq, wake_flags, success);
2376 check_preempt_curr(rq, p, wake_flags);
2377
2378 p->state = TASK_RUNNING;
2379#ifdef CONFIG_SMP
2380 if (p->sched_class->task_woken)
2381 p->sched_class->task_woken(rq, p);
2382
2383 if (unlikely(rq->idle_stamp)) {
2384 u64 delta = rq->clock - rq->idle_stamp;
2385 u64 max = 2*sysctl_sched_migration_cost;
2386
2387 if (delta > max)
2388 rq->avg_idle = max;
2389 else
2390 update_avg(&rq->avg_idle, delta);
2391 rq->idle_stamp = 0;
2392 }
2393#endif
2394out: 2413out:
2395 task_rq_unlock(rq, &flags); 2414 task_rq_unlock(rq, &flags);
2396 put_cpu(); 2415 put_cpu();
@@ -2399,6 +2418,37 @@ out:
2399} 2418}
2400 2419
2401/** 2420/**
2421 * try_to_wake_up_local - try to wake up a local task with rq lock held
2422 * @p: the thread to be awakened
2423 *
2424 * Put @p on the run-queue if it's not alredy there. The caller must
2425 * ensure that this_rq() is locked, @p is bound to this_rq() and not
2426 * the current task. this_rq() stays locked over invocation.
2427 */
2428static void try_to_wake_up_local(struct task_struct *p)
2429{
2430 struct rq *rq = task_rq(p);
2431 bool success = false;
2432
2433 BUG_ON(rq != this_rq());
2434 BUG_ON(p == current);
2435 lockdep_assert_held(&rq->lock);
2436
2437 if (!(p->state & TASK_NORMAL))
2438 return;
2439
2440 if (!p->se.on_rq) {
2441 if (likely(!task_running(rq, p))) {
2442 schedstat_inc(rq, ttwu_count);
2443 schedstat_inc(rq, ttwu_local);
2444 }
2445 ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP);
2446 success = true;
2447 }
2448 ttwu_post_activation(p, rq, 0, success);
2449}
2450
2451/**
2402 * wake_up_process - Wake up a specific process 2452 * wake_up_process - Wake up a specific process
2403 * @p: The process to be woken up. 2453 * @p: The process to be woken up.
2404 * 2454 *
@@ -3603,10 +3653,24 @@ need_resched_nonpreemptible:
3603 clear_tsk_need_resched(prev); 3653 clear_tsk_need_resched(prev);
3604 3654
3605 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3655 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
3606 if (unlikely(signal_pending_state(prev->state, prev))) 3656 if (unlikely(signal_pending_state(prev->state, prev))) {
3607 prev->state = TASK_RUNNING; 3657 prev->state = TASK_RUNNING;
3608 else 3658 } else {
3659 /*
3660 * If a worker is going to sleep, notify and
3661 * ask workqueue whether it wants to wake up a
3662 * task to maintain concurrency. If so, wake
3663 * up the task.
3664 */
3665 if (prev->flags & PF_WQ_WORKER) {
3666 struct task_struct *to_wakeup;
3667
3668 to_wakeup = wq_worker_sleeping(prev, cpu);
3669 if (to_wakeup)
3670 try_to_wake_up_local(to_wakeup);
3671 }
3609 deactivate_task(rq, prev, DEQUEUE_SLEEP); 3672 deactivate_task(rq, prev, DEQUEUE_SLEEP);
3673 }
3610 switch_count = &prev->nvcsw; 3674 switch_count = &prev->nvcsw;
3611 } 3675 }
3612 3676
@@ -5801,20 +5865,49 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5801 */ 5865 */
5802static struct notifier_block __cpuinitdata migration_notifier = { 5866static struct notifier_block __cpuinitdata migration_notifier = {
5803 .notifier_call = migration_call, 5867 .notifier_call = migration_call,
5804 .priority = 10 5868 .priority = CPU_PRI_MIGRATION,
5805}; 5869};
5806 5870
5871static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
5872 unsigned long action, void *hcpu)
5873{
5874 switch (action & ~CPU_TASKS_FROZEN) {
5875 case CPU_ONLINE:
5876 case CPU_DOWN_FAILED:
5877 set_cpu_active((long)hcpu, true);
5878 return NOTIFY_OK;
5879 default:
5880 return NOTIFY_DONE;
5881 }
5882}
5883
5884static int __cpuinit sched_cpu_inactive(struct notifier_block *nfb,
5885 unsigned long action, void *hcpu)
5886{
5887 switch (action & ~CPU_TASKS_FROZEN) {
5888 case CPU_DOWN_PREPARE:
5889 set_cpu_active((long)hcpu, false);
5890 return NOTIFY_OK;
5891 default:
5892 return NOTIFY_DONE;
5893 }
5894}
5895
5807static int __init migration_init(void) 5896static int __init migration_init(void)
5808{ 5897{
5809 void *cpu = (void *)(long)smp_processor_id(); 5898 void *cpu = (void *)(long)smp_processor_id();
5810 int err; 5899 int err;
5811 5900
5812 /* Start one for the boot CPU: */ 5901 /* Initialize migration for the boot CPU */
5813 err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu); 5902 err = migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
5814 BUG_ON(err == NOTIFY_BAD); 5903 BUG_ON(err == NOTIFY_BAD);
5815 migration_call(&migration_notifier, CPU_ONLINE, cpu); 5904 migration_call(&migration_notifier, CPU_ONLINE, cpu);
5816 register_cpu_notifier(&migration_notifier); 5905 register_cpu_notifier(&migration_notifier);
5817 5906
5907 /* Register cpu active notifiers */
5908 cpu_notifier(sched_cpu_active, CPU_PRI_SCHED_ACTIVE);
5909 cpu_notifier(sched_cpu_inactive, CPU_PRI_SCHED_INACTIVE);
5910
5818 return 0; 5911 return 0;
5819} 5912}
5820early_initcall(migration_init); 5913early_initcall(migration_init);
@@ -7273,29 +7366,35 @@ int __init sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7273} 7366}
7274#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ 7367#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7275 7368
7276#ifndef CONFIG_CPUSETS
7277/* 7369/*
7278 * Add online and remove offline CPUs from the scheduler domains. 7370 * Update cpusets according to cpu_active mask. If cpusets are
7279 * When cpusets are enabled they take over this function. 7371 * disabled, cpuset_update_active_cpus() becomes a simple wrapper
7372 * around partition_sched_domains().
7280 */ 7373 */
7281static int update_sched_domains(struct notifier_block *nfb, 7374static int __cpuexit cpuset_cpu_active(struct notifier_block *nfb,
7282 unsigned long action, void *hcpu) 7375 unsigned long action, void *hcpu)
7283{ 7376{
7284 switch (action) { 7377 switch (action & ~CPU_TASKS_FROZEN) {
7285 case CPU_ONLINE: 7378 case CPU_ONLINE:
7286 case CPU_ONLINE_FROZEN:
7287 case CPU_DOWN_PREPARE:
7288 case CPU_DOWN_PREPARE_FROZEN:
7289 case CPU_DOWN_FAILED: 7379 case CPU_DOWN_FAILED:
7290 case CPU_DOWN_FAILED_FROZEN: 7380 cpuset_update_active_cpus();
7291 partition_sched_domains(1, NULL, NULL);
7292 return NOTIFY_OK; 7381 return NOTIFY_OK;
7382 default:
7383 return NOTIFY_DONE;
7384 }
7385}
7293 7386
7387static int __cpuexit cpuset_cpu_inactive(struct notifier_block *nfb,
7388 unsigned long action, void *hcpu)
7389{
7390 switch (action & ~CPU_TASKS_FROZEN) {
7391 case CPU_DOWN_PREPARE:
7392 cpuset_update_active_cpus();
7393 return NOTIFY_OK;
7294 default: 7394 default:
7295 return NOTIFY_DONE; 7395 return NOTIFY_DONE;
7296 } 7396 }
7297} 7397}
7298#endif
7299 7398
7300static int update_runtime(struct notifier_block *nfb, 7399static int update_runtime(struct notifier_block *nfb,
7301 unsigned long action, void *hcpu) 7400 unsigned long action, void *hcpu)
@@ -7341,10 +7440,8 @@ void __init sched_init_smp(void)
7341 mutex_unlock(&sched_domains_mutex); 7440 mutex_unlock(&sched_domains_mutex);
7342 put_online_cpus(); 7441 put_online_cpus();
7343 7442
7344#ifndef CONFIG_CPUSETS 7443 hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
7345 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7444 hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
7346 hotcpu_notifier(update_sched_domains, 0);
7347#endif
7348 7445
7349 /* RT runtime code needs to handle some hotplug events */ 7446 /* RT runtime code needs to handle some hotplug events */
7350 hotcpu_notifier(update_runtime, 0); 7447 hotcpu_notifier(update_runtime, 0);
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h
new file mode 100644
index 000000000000..af040babb742
--- /dev/null
+++ b/kernel/workqueue_sched.h
@@ -0,0 +1,16 @@
1/*
2 * kernel/workqueue_sched.h
3 *
4 * Scheduler hooks for concurrency managed workqueue. Only to be
5 * included from sched.c and workqueue.c.
6 */
7static inline void wq_worker_waking_up(struct task_struct *task,
8 unsigned int cpu)
9{
10}
11
12static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
13 unsigned int cpu)
14{
15 return NULL;
16}