aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2008-11-13 19:29:12 -0500
committerJames Morris <jmorris@namei.org>2008-11-13 19:29:12 -0500
commit2b828925652340277a889cbc11b2d0637f7cdaf7 (patch)
tree32fcb3d3e466fc419fad2d3717956a5b5ad3d35a /kernel
parent3a3b7ce9336952ea7b9564d976d068a238976c9d (diff)
parent58e20d8d344b0ee083febb18c2b021d2427e56ca (diff)
Merge branch 'master' into next
Conflicts: security/keys/internal.h security/keys/process_keys.c security/keys/request_key.c Fixed conflicts above by using the non 'tsk' versions. Signed-off-by: James Morris <jmorris@namei.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c1
-rw-r--r--kernel/cgroup_freezer.c19
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hrtimer.c26
-rw-r--r--kernel/kprobes.c23
-rw-r--r--kernel/sched.c28
-rw-r--r--kernel/sched_debug.c41
-rw-r--r--kernel/sched_fair.c83
-rw-r--r--kernel/sched_features.h1
-rw-r--r--kernel/smp.c18
-rw-r--r--kernel/softirq.c7
-rw-r--r--kernel/time/tick-sched.c4
-rw-r--r--kernel/timer.c129
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c19
-rw-r--r--kernel/workqueue.c45
17 files changed, 339 insertions, 115 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a512a75a5560..8fe8c0cb137b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2500,7 +2500,6 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
2500 list_del(&cgrp->sibling); 2500 list_del(&cgrp->sibling);
2501 spin_lock(&cgrp->dentry->d_lock); 2501 spin_lock(&cgrp->dentry->d_lock);
2502 d = dget(cgrp->dentry); 2502 d = dget(cgrp->dentry);
2503 cgrp->dentry = NULL;
2504 spin_unlock(&d->d_lock); 2503 spin_unlock(&d->d_lock);
2505 2504
2506 cgroup_d_remove_dir(d); 2505 cgroup_d_remove_dir(d);
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index 7fa476f01d05..fb249e2bcada 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -184,9 +184,20 @@ static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task)
184{ 184{
185 struct freezer *freezer; 185 struct freezer *freezer;
186 186
187 task_lock(task); 187 /*
188 * No lock is needed, since the task isn't on tasklist yet,
189 * so it can't be moved to another cgroup, which means the
190 * freezer won't be removed and will be valid during this
191 * function call.
192 */
188 freezer = task_freezer(task); 193 freezer = task_freezer(task);
189 task_unlock(task); 194
195 /*
196 * The root cgroup is non-freezable, so we can skip the
197 * following check.
198 */
199 if (!freezer->css.cgroup->parent)
200 return;
190 201
191 spin_lock_irq(&freezer->lock); 202 spin_lock_irq(&freezer->lock);
192 BUG_ON(freezer->state == CGROUP_FROZEN); 203 BUG_ON(freezer->state == CGROUP_FROZEN);
@@ -331,7 +342,7 @@ static int freezer_write(struct cgroup *cgroup,
331 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) 342 else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0)
332 goal_state = CGROUP_FROZEN; 343 goal_state = CGROUP_FROZEN;
333 else 344 else
334 return -EIO; 345 return -EINVAL;
335 346
336 if (!cgroup_lock_live_group(cgroup)) 347 if (!cgroup_lock_live_group(cgroup))
337 return -ENODEV; 348 return -ENODEV;
@@ -350,6 +361,8 @@ static struct cftype files[] = {
350 361
351static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) 362static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
352{ 363{
364 if (!cgroup->parent)
365 return 0;
353 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); 366 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
354} 367}
355 368
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 86d49045daed..5a732c5ef08b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = {
499#endif 499#endif
500}; 500};
501EXPORT_SYMBOL_GPL(cpu_bit_bitmap); 501EXPORT_SYMBOL_GPL(cpu_bit_bitmap);
502
503const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL;
504EXPORT_SYMBOL(cpu_all_bits);
diff --git a/kernel/exit.c b/kernel/exit.c
index c0711da15486..16eda9b39f8d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -143,6 +143,11 @@ static void __exit_signal(struct task_struct *tsk)
143 if (sig) { 143 if (sig) {
144 flush_sigqueue(&sig->shared_pending); 144 flush_sigqueue(&sig->shared_pending);
145 taskstats_tgid_free(sig); 145 taskstats_tgid_free(sig);
146 /*
147 * Make sure ->signal can't go away under rq->lock,
148 * see account_group_exec_runtime().
149 */
150 task_rq_unlock_wait(tsk);
146 __cleanup_signal(sig); 151 __cleanup_signal(sig);
147 } 152 }
148} 153}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b465dfde426..47e63349d1b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -664,14 +664,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
664 664
665 /* Timer is expired, act upon the callback mode */ 665 /* Timer is expired, act upon the callback mode */
666 switch(timer->cb_mode) { 666 switch(timer->cb_mode) {
667 case HRTIMER_CB_IRQSAFE_NO_RESTART:
668 debug_hrtimer_deactivate(timer);
669 /*
670 * We can call the callback from here. No restart
671 * happens, so no danger of recursion
672 */
673 BUG_ON(timer->function(timer) != HRTIMER_NORESTART);
674 return 1;
675 case HRTIMER_CB_IRQSAFE_PERCPU: 667 case HRTIMER_CB_IRQSAFE_PERCPU:
676 case HRTIMER_CB_IRQSAFE_UNLOCKED: 668 case HRTIMER_CB_IRQSAFE_UNLOCKED:
677 /* 669 /*
@@ -683,7 +675,6 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
683 */ 675 */
684 debug_hrtimer_deactivate(timer); 676 debug_hrtimer_deactivate(timer);
685 return 1; 677 return 1;
686 case HRTIMER_CB_IRQSAFE:
687 case HRTIMER_CB_SOFTIRQ: 678 case HRTIMER_CB_SOFTIRQ:
688 /* 679 /*
689 * Move everything else into the softirq pending list ! 680 * Move everything else into the softirq pending list !
@@ -1209,6 +1200,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1209 enum hrtimer_restart (*fn)(struct hrtimer *); 1200 enum hrtimer_restart (*fn)(struct hrtimer *);
1210 struct hrtimer *timer; 1201 struct hrtimer *timer;
1211 int restart; 1202 int restart;
1203 int emulate_hardirq_ctx = 0;
1212 1204
1213 timer = list_entry(cpu_base->cb_pending.next, 1205 timer = list_entry(cpu_base->cb_pending.next,
1214 struct hrtimer, cb_entry); 1206 struct hrtimer, cb_entry);
@@ -1217,10 +1209,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base)
1217 timer_stats_account_hrtimer(timer); 1209 timer_stats_account_hrtimer(timer);
1218 1210
1219 fn = timer->function; 1211 fn = timer->function;
1212 /*
1213 * A timer might have been added to the cb_pending list
1214 * when it was migrated during a cpu-offline operation.
1215 * Emulate hardirq context for such timers.
1216 */
1217 if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU ||
1218 timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED)
1219 emulate_hardirq_ctx = 1;
1220
1220 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); 1221 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0);
1221 spin_unlock_irq(&cpu_base->lock); 1222 spin_unlock_irq(&cpu_base->lock);
1222 1223
1223 restart = fn(timer); 1224 if (unlikely(emulate_hardirq_ctx)) {
1225 local_irq_disable();
1226 restart = fn(timer);
1227 local_irq_enable();
1228 } else
1229 restart = fn(timer);
1224 1230
1225 spin_lock_irq(&cpu_base->lock); 1231 spin_lock_irq(&cpu_base->lock);
1226 1232
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 8b57a2597f21..9f8a3f25259a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -72,7 +72,7 @@ static bool kprobe_enabled;
72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 72DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 73static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
74static struct { 74static struct {
75 spinlock_t lock ____cacheline_aligned; 75 spinlock_t lock ____cacheline_aligned_in_smp;
76} kretprobe_table_locks[KPROBE_TABLE_SIZE]; 76} kretprobe_table_locks[KPROBE_TABLE_SIZE];
77 77
78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash) 78static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
@@ -613,30 +613,37 @@ static int __kprobes __register_kprobe(struct kprobe *p,
613 return -EINVAL; 613 return -EINVAL;
614 p->addr = addr; 614 p->addr = addr;
615 615
616 if (!kernel_text_address((unsigned long) p->addr) || 616 preempt_disable();
617 in_kprobes_functions((unsigned long) p->addr)) 617 if (!__kernel_text_address((unsigned long) p->addr) ||
618 in_kprobes_functions((unsigned long) p->addr)) {
619 preempt_enable();
618 return -EINVAL; 620 return -EINVAL;
621 }
619 622
620 p->mod_refcounted = 0; 623 p->mod_refcounted = 0;
621 624
622 /* 625 /*
623 * Check if are we probing a module. 626 * Check if are we probing a module.
624 */ 627 */
625 probed_mod = module_text_address((unsigned long) p->addr); 628 probed_mod = __module_text_address((unsigned long) p->addr);
626 if (probed_mod) { 629 if (probed_mod) {
627 struct module *calling_mod = module_text_address(called_from); 630 struct module *calling_mod;
631 calling_mod = __module_text_address(called_from);
628 /* 632 /*
629 * We must allow modules to probe themself and in this case 633 * We must allow modules to probe themself and in this case
630 * avoid incrementing the module refcount, so as to allow 634 * avoid incrementing the module refcount, so as to allow
631 * unloading of self probing modules. 635 * unloading of self probing modules.
632 */ 636 */
633 if (calling_mod && calling_mod != probed_mod) { 637 if (calling_mod && calling_mod != probed_mod) {
634 if (unlikely(!try_module_get(probed_mod))) 638 if (unlikely(!try_module_get(probed_mod))) {
639 preempt_enable();
635 return -EINVAL; 640 return -EINVAL;
641 }
636 p->mod_refcounted = 1; 642 p->mod_refcounted = 1;
637 } else 643 } else
638 probed_mod = NULL; 644 probed_mod = NULL;
639 } 645 }
646 preempt_enable();
640 647
641 p->nmissed = 0; 648 p->nmissed = 0;
642 INIT_LIST_HEAD(&p->list); 649 INIT_LIST_HEAD(&p->list);
@@ -718,6 +725,10 @@ static void __kprobes __unregister_kprobe_bottom(struct kprobe *p)
718 struct kprobe *old_p; 725 struct kprobe *old_p;
719 726
720 if (p->mod_refcounted) { 727 if (p->mod_refcounted) {
728 /*
729 * Since we've already incremented refcount,
730 * we don't need to disable preemption.
731 */
721 mod = module_text_address((unsigned long)p->addr); 732 mod = module_text_address((unsigned long)p->addr);
722 if (mod) 733 if (mod)
723 module_put(mod); 734 module_put(mod);
diff --git a/kernel/sched.c b/kernel/sched.c
index 92992e287b10..204d0662b438 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -399,9 +399,9 @@ struct cfs_rq {
399 * 'curr' points to currently running entity on this cfs_rq. 399 * 'curr' points to currently running entity on this cfs_rq.
400 * It is set to NULL otherwise (i.e when none are currently running). 400 * It is set to NULL otherwise (i.e when none are currently running).
401 */ 401 */
402 struct sched_entity *curr, *next; 402 struct sched_entity *curr, *next, *last;
403 403
404 unsigned long nr_spread_over; 404 unsigned int nr_spread_over;
405 405
406#ifdef CONFIG_FAIR_GROUP_SCHED 406#ifdef CONFIG_FAIR_GROUP_SCHED
407 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 407 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */
@@ -971,6 +971,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
971 } 971 }
972} 972}
973 973
974void task_rq_unlock_wait(struct task_struct *p)
975{
976 struct rq *rq = task_rq(p);
977
978 smp_mb(); /* spin-unlock-wait is not a full memory barrier */
979 spin_unlock_wait(&rq->lock);
980}
981
974static void __task_rq_unlock(struct rq *rq) 982static void __task_rq_unlock(struct rq *rq)
975 __releases(rq->lock) 983 __releases(rq->lock)
976{ 984{
@@ -1450,6 +1458,8 @@ static unsigned long cpu_avg_load_per_task(int cpu)
1450 1458
1451 if (rq->nr_running) 1459 if (rq->nr_running)
1452 rq->avg_load_per_task = rq->load.weight / rq->nr_running; 1460 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1461 else
1462 rq->avg_load_per_task = 0;
1453 1463
1454 return rq->avg_load_per_task; 1464 return rq->avg_load_per_task;
1455} 1465}
@@ -1807,7 +1817,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1807 /* 1817 /*
1808 * Buddy candidates are cache hot: 1818 * Buddy candidates are cache hot:
1809 */ 1819 */
1810 if (sched_feat(CACHE_HOT_BUDDY) && (&p->se == cfs_rq_of(&p->se)->next)) 1820 if (sched_feat(CACHE_HOT_BUDDY) &&
1821 (&p->se == cfs_rq_of(&p->se)->next ||
1822 &p->se == cfs_rq_of(&p->se)->last))
1811 return 1; 1823 return 1;
1812 1824
1813 if (p->sched_class != &fair_sched_class) 1825 if (p->sched_class != &fair_sched_class)
@@ -5874,6 +5886,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5874 struct rq *rq = cpu_rq(cpu); 5886 struct rq *rq = cpu_rq(cpu);
5875 unsigned long flags; 5887 unsigned long flags;
5876 5888
5889 spin_lock_irqsave(&rq->lock, flags);
5890
5877 __sched_fork(idle); 5891 __sched_fork(idle);
5878 idle->se.exec_start = sched_clock(); 5892 idle->se.exec_start = sched_clock();
5879 5893
@@ -5881,7 +5895,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5881 idle->cpus_allowed = cpumask_of_cpu(cpu); 5895 idle->cpus_allowed = cpumask_of_cpu(cpu);
5882 __set_task_cpu(idle, cpu); 5896 __set_task_cpu(idle, cpu);
5883 5897
5884 spin_lock_irqsave(&rq->lock, flags);
5885 rq->curr = rq->idle = idle; 5898 rq->curr = rq->idle = idle;
5886#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5899#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
5887 idle->oncpu = 1; 5900 idle->oncpu = 1;
@@ -6891,15 +6904,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6891 struct sched_domain *tmp; 6904 struct sched_domain *tmp;
6892 6905
6893 /* Remove the sched domains which do not contribute to scheduling. */ 6906 /* Remove the sched domains which do not contribute to scheduling. */
6894 for (tmp = sd; tmp; tmp = tmp->parent) { 6907 for (tmp = sd; tmp; ) {
6895 struct sched_domain *parent = tmp->parent; 6908 struct sched_domain *parent = tmp->parent;
6896 if (!parent) 6909 if (!parent)
6897 break; 6910 break;
6911
6898 if (sd_parent_degenerate(tmp, parent)) { 6912 if (sd_parent_degenerate(tmp, parent)) {
6899 tmp->parent = parent->parent; 6913 tmp->parent = parent->parent;
6900 if (parent->parent) 6914 if (parent->parent)
6901 parent->parent->child = tmp; 6915 parent->parent->child = tmp;
6902 } 6916 } else
6917 tmp = tmp->parent;
6903 } 6918 }
6904 6919
6905 if (sd && sd_degenerate(sd)) { 6920 if (sd && sd_degenerate(sd)) {
@@ -7688,6 +7703,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7688error: 7703error:
7689 free_sched_groups(cpu_map, tmpmask); 7704 free_sched_groups(cpu_map, tmpmask);
7690 SCHED_CPUMASK_FREE((void *)allmasks); 7705 SCHED_CPUMASK_FREE((void *)allmasks);
7706 kfree(rd);
7691 return -ENOMEM; 7707 return -ENOMEM;
7692#endif 7708#endif
7693} 7709}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 5ae17762ec32..48ecc51e7701 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 last = __pick_last_entity(cfs_rq); 144 last = __pick_last_entity(cfs_rq);
145 if (last) 145 if (last)
146 max_vruntime = last->vruntime; 146 max_vruntime = last->vruntime;
147 min_vruntime = rq->cfs.min_vruntime; 147 min_vruntime = cfs_rq->min_vruntime;
148 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; 148 rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime;
149 spin_unlock_irqrestore(&rq->lock, flags); 149 spin_unlock_irqrestore(&rq->lock, flags);
150 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", 150 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
@@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
161 SPLIT_NS(spread0)); 161 SPLIT_NS(spread0));
162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); 162 SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running);
163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 163 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
164#ifdef CONFIG_SCHEDSTATS
165#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
166
167 P(yld_exp_empty);
168 P(yld_act_empty);
169 P(yld_both_empty);
170 P(yld_count);
171 164
172 P(sched_switch); 165 SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
173 P(sched_count);
174 P(sched_goidle);
175
176 P(ttwu_count);
177 P(ttwu_local);
178
179 P(bkl_count);
180
181#undef P
182#endif
183 SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over",
184 cfs_rq->nr_spread_over); 166 cfs_rq->nr_spread_over);
185#ifdef CONFIG_FAIR_GROUP_SCHED 167#ifdef CONFIG_FAIR_GROUP_SCHED
186#ifdef CONFIG_SMP 168#ifdef CONFIG_SMP
@@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu)
260#undef P 242#undef P
261#undef PN 243#undef PN
262 244
245#ifdef CONFIG_SCHEDSTATS
246#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n);
247
248 P(yld_exp_empty);
249 P(yld_act_empty);
250 P(yld_both_empty);
251 P(yld_count);
252
253 P(sched_switch);
254 P(sched_count);
255 P(sched_goidle);
256
257 P(ttwu_count);
258 P(ttwu_local);
259
260 P(bkl_count);
261
262#undef P
263#endif
263 print_cfs_stats(m, cpu); 264 print_cfs_stats(m, cpu);
264 print_rt_stats(m, cpu); 265 print_rt_stats(m, cpu);
265 266
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ce514afd78ff..98345e45b059 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -341,23 +341,20 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
341 cfs_rq->rb_leftmost = next_node; 341 cfs_rq->rb_leftmost = next_node;
342 } 342 }
343 343
344 if (cfs_rq->next == se)
345 cfs_rq->next = NULL;
346
347 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 344 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
348} 345}
349 346
350static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
351{
352 return cfs_rq->rb_leftmost;
353}
354
355static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 347static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
356{ 348{
357 return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); 349 struct rb_node *left = cfs_rq->rb_leftmost;
350
351 if (!left)
352 return NULL;
353
354 return rb_entry(left, struct sched_entity, run_node);
358} 355}
359 356
360static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 357static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
361{ 358{
362 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 359 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
363 360
@@ -719,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
719 __enqueue_entity(cfs_rq, se); 716 __enqueue_entity(cfs_rq, se);
720} 717}
721 718
719static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
720{
721 if (cfs_rq->last == se)
722 cfs_rq->last = NULL;
723
724 if (cfs_rq->next == se)
725 cfs_rq->next = NULL;
726}
727
722static void 728static void
723dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 729dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
724{ 730{
@@ -741,6 +747,8 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
741#endif 747#endif
742 } 748 }
743 749
750 clear_buddies(cfs_rq, se);
751
744 if (se != cfs_rq->curr) 752 if (se != cfs_rq->curr)
745 __dequeue_entity(cfs_rq, se); 753 __dequeue_entity(cfs_rq, se);
746 account_entity_dequeue(cfs_rq, se); 754 account_entity_dequeue(cfs_rq, se);
@@ -794,24 +802,15 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
794static int 802static int
795wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); 803wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
796 804
797static struct sched_entity *
798pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
799{
800 if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1)
801 return se;
802
803 return cfs_rq->next;
804}
805
806static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 805static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
807{ 806{
808 struct sched_entity *se = NULL; 807 struct sched_entity *se = __pick_next_entity(cfs_rq);
809 808
810 if (first_fair(cfs_rq)) { 809 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
811 se = __pick_next_entity(cfs_rq); 810 return cfs_rq->next;
812 se = pick_next(cfs_rq, se); 811
813 set_next_entity(cfs_rq, se); 812 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
814 } 813 return cfs_rq->last;
815 814
816 return se; 815 return se;
817} 816}
@@ -983,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
983 if (unlikely(cfs_rq->nr_running == 1)) 982 if (unlikely(cfs_rq->nr_running == 1))
984 return; 983 return;
985 984
985 clear_buddies(cfs_rq, se);
986
986 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 987 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
987 update_rq_clock(rq); 988 update_rq_clock(rq);
988 /* 989 /*
@@ -1325,26 +1326,53 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1325 return 0; 1326 return 0;
1326} 1327}
1327 1328
1329static void set_last_buddy(struct sched_entity *se)
1330{
1331 for_each_sched_entity(se)
1332 cfs_rq_of(se)->last = se;
1333}
1334
1335static void set_next_buddy(struct sched_entity *se)
1336{
1337 for_each_sched_entity(se)
1338 cfs_rq_of(se)->next = se;
1339}
1340
1328/* 1341/*
1329 * Preempt the current task with a newly woken task if needed: 1342 * Preempt the current task with a newly woken task if needed:
1330 */ 1343 */
1331static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1344static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1332{ 1345{
1333 struct task_struct *curr = rq->curr; 1346 struct task_struct *curr = rq->curr;
1334 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1335 struct sched_entity *se = &curr->se, *pse = &p->se; 1347 struct sched_entity *se = &curr->se, *pse = &p->se;
1336 1348
1337 if (unlikely(rt_prio(p->prio))) { 1349 if (unlikely(rt_prio(p->prio))) {
1350 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1351
1338 update_rq_clock(rq); 1352 update_rq_clock(rq);
1339 update_curr(cfs_rq); 1353 update_curr(cfs_rq);
1340 resched_task(curr); 1354 resched_task(curr);
1341 return; 1355 return;
1342 } 1356 }
1343 1357
1358 if (unlikely(p->sched_class != &fair_sched_class))
1359 return;
1360
1344 if (unlikely(se == pse)) 1361 if (unlikely(se == pse))
1345 return; 1362 return;
1346 1363
1347 cfs_rq_of(pse)->next = pse; 1364 /*
1365 * Only set the backward buddy when the current task is still on the
1366 * rq. This can happen when a wakeup gets interleaved with schedule on
1367 * the ->pre_schedule() or idle_balance() point, either of which can
1368 * drop the rq lock.
1369 *
1370 * Also, during early boot the idle thread is in the fair class, for
1371 * obvious reasons its a bad idea to schedule back to the idle thread.
1372 */
1373 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1374 set_last_buddy(se);
1375 set_next_buddy(pse);
1348 1376
1349 /* 1377 /*
1350 * We can come here with TIF_NEED_RESCHED already set from new task 1378 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1396,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1396 1424
1397 do { 1425 do {
1398 se = pick_next_entity(cfs_rq); 1426 se = pick_next_entity(cfs_rq);
1427 set_next_entity(cfs_rq, se);
1399 cfs_rq = group_cfs_rq(se); 1428 cfs_rq = group_cfs_rq(se);
1400 } while (cfs_rq); 1429 } while (cfs_rq);
1401 1430
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index fda016218296..da5d93b5d2c6 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -12,3 +12,4 @@ SCHED_FEAT(LB_BIAS, 1)
12SCHED_FEAT(LB_WAKEUP_UPDATE, 1) 12SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
13SCHED_FEAT(ASYM_EFF_LOAD, 1) 13SCHED_FEAT(ASYM_EFF_LOAD, 1)
14SCHED_FEAT(WAKEUP_OVERLAP, 0) 14SCHED_FEAT(WAKEUP_OVERLAP, 0)
15SCHED_FEAT(LAST_BUDDY, 1)
diff --git a/kernel/smp.c b/kernel/smp.c
index f362a8553777..75c8dde58c55 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -51,10 +51,6 @@ static void csd_flag_wait(struct call_single_data *data)
51{ 51{
52 /* Wait for response */ 52 /* Wait for response */
53 do { 53 do {
54 /*
55 * We need to see the flags store in the IPI handler
56 */
57 smp_mb();
58 if (!(data->flags & CSD_FLAG_WAIT)) 54 if (!(data->flags & CSD_FLAG_WAIT))
59 break; 55 break;
60 cpu_relax(); 56 cpu_relax();
@@ -76,6 +72,11 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
76 list_add_tail(&data->list, &dst->list); 72 list_add_tail(&data->list, &dst->list);
77 spin_unlock_irqrestore(&dst->lock, flags); 73 spin_unlock_irqrestore(&dst->lock, flags);
78 74
75 /*
76 * Make the list addition visible before sending the ipi.
77 */
78 smp_mb();
79
79 if (ipi) 80 if (ipi)
80 arch_send_call_function_single_ipi(cpu); 81 arch_send_call_function_single_ipi(cpu);
81 82
@@ -157,7 +158,7 @@ void generic_smp_call_function_single_interrupt(void)
157 * Need to see other stores to list head for checking whether 158 * Need to see other stores to list head for checking whether
158 * list is empty without holding q->lock 159 * list is empty without holding q->lock
159 */ 160 */
160 smp_mb(); 161 smp_read_barrier_depends();
161 while (!list_empty(&q->list)) { 162 while (!list_empty(&q->list)) {
162 unsigned int data_flags; 163 unsigned int data_flags;
163 164
@@ -191,7 +192,7 @@ void generic_smp_call_function_single_interrupt(void)
191 /* 192 /*
192 * See comment on outer loop 193 * See comment on outer loop
193 */ 194 */
194 smp_mb(); 195 smp_read_barrier_depends();
195 } 196 }
196} 197}
197 198
@@ -370,6 +371,11 @@ int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
370 list_add_tail_rcu(&data->csd.list, &call_function_queue); 371 list_add_tail_rcu(&data->csd.list, &call_function_queue);
371 spin_unlock_irqrestore(&call_function_lock, flags); 372 spin_unlock_irqrestore(&call_function_lock, flags);
372 373
374 /*
375 * Make the list addition visible before sending the ipi.
376 */
377 smp_mb();
378
373 /* Send a message to all CPUs in the map */ 379 /* Send a message to all CPUs in the map */
374 arch_send_call_function_ipi(mask); 380 arch_send_call_function_ipi(mask);
375 381
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7110daeb9a90..e7c69a720d69 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -269,10 +269,11 @@ void irq_enter(void)
269{ 269{
270 int cpu = smp_processor_id(); 270 int cpu = smp_processor_id();
271 271
272 if (idle_cpu(cpu) && !in_interrupt()) 272 if (idle_cpu(cpu) && !in_interrupt()) {
273 __irq_enter();
273 tick_check_idle(cpu); 274 tick_check_idle(cpu);
274 275 } else
275 __irq_enter(); 276 __irq_enter();
276} 277}
277 278
278#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED 279#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5bbb1044f847..342fc9ccab46 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void)
568 */ 568 */
569static void tick_nohz_kick_tick(int cpu) 569static void tick_nohz_kick_tick(int cpu)
570{ 570{
571#if 0
572 /* Switch back to 2.6.27 behaviour */
573
571 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 574 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
572 ktime_t delta, now; 575 ktime_t delta, now;
573 576
@@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu)
584 return; 587 return;
585 588
586 tick_nohz_restart(ts, now); 589 tick_nohz_restart(ts, now);
590#endif
587} 591}
588 592
589#else 593#else
diff --git a/kernel/timer.c b/kernel/timer.c
index b54e4646cee7..566257d1dc10 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -112,27 +112,8 @@ timer_set_base(struct timer_list *timer, struct tvec_base *new_base)
112 tbase_get_deferrable(timer->base)); 112 tbase_get_deferrable(timer->base));
113} 113}
114 114
115/** 115static unsigned long round_jiffies_common(unsigned long j, int cpu,
116 * __round_jiffies - function to round jiffies to a full second 116 bool force_up)
117 * @j: the time in (absolute) jiffies that should be rounded
118 * @cpu: the processor number on which the timeout will happen
119 *
120 * __round_jiffies() rounds an absolute time in the future (in jiffies)
121 * up or down to (approximately) full seconds. This is useful for timers
122 * for which the exact time they fire does not matter too much, as long as
123 * they fire approximately every X seconds.
124 *
125 * By rounding these timers to whole seconds, all such timers will fire
126 * at the same time, rather than at various times spread out. The goal
127 * of this is to have the CPU wake up less, which saves power.
128 *
129 * The exact rounding is skewed for each processor to avoid all
130 * processors firing at the exact same time, which could lead
131 * to lock contention or spurious cache line bouncing.
132 *
133 * The return value is the rounded version of the @j parameter.
134 */
135unsigned long __round_jiffies(unsigned long j, int cpu)
136{ 117{
137 int rem; 118 int rem;
138 unsigned long original = j; 119 unsigned long original = j;
@@ -154,8 +135,9 @@ unsigned long __round_jiffies(unsigned long j, int cpu)
154 * due to delays of the timer irq, long irq off times etc etc) then 135 * due to delays of the timer irq, long irq off times etc etc) then
155 * we should round down to the whole second, not up. Use 1/4th second 136 * we should round down to the whole second, not up. Use 1/4th second
156 * as cutoff for this rounding as an extreme upper bound for this. 137 * as cutoff for this rounding as an extreme upper bound for this.
138 * But never round down if @force_up is set.
157 */ 139 */
158 if (rem < HZ/4) /* round down */ 140 if (rem < HZ/4 && !force_up) /* round down */
159 j = j - rem; 141 j = j - rem;
160 else /* round up */ 142 else /* round up */
161 j = j - rem + HZ; 143 j = j - rem + HZ;
@@ -167,6 +149,31 @@ unsigned long __round_jiffies(unsigned long j, int cpu)
167 return original; 149 return original;
168 return j; 150 return j;
169} 151}
152
153/**
154 * __round_jiffies - function to round jiffies to a full second
155 * @j: the time in (absolute) jiffies that should be rounded
156 * @cpu: the processor number on which the timeout will happen
157 *
158 * __round_jiffies() rounds an absolute time in the future (in jiffies)
159 * up or down to (approximately) full seconds. This is useful for timers
160 * for which the exact time they fire does not matter too much, as long as
161 * they fire approximately every X seconds.
162 *
163 * By rounding these timers to whole seconds, all such timers will fire
164 * at the same time, rather than at various times spread out. The goal
165 * of this is to have the CPU wake up less, which saves power.
166 *
167 * The exact rounding is skewed for each processor to avoid all
168 * processors firing at the exact same time, which could lead
169 * to lock contention or spurious cache line bouncing.
170 *
171 * The return value is the rounded version of the @j parameter.
172 */
173unsigned long __round_jiffies(unsigned long j, int cpu)
174{
175 return round_jiffies_common(j, cpu, false);
176}
170EXPORT_SYMBOL_GPL(__round_jiffies); 177EXPORT_SYMBOL_GPL(__round_jiffies);
171 178
172/** 179/**
@@ -191,13 +198,10 @@ EXPORT_SYMBOL_GPL(__round_jiffies);
191 */ 198 */
192unsigned long __round_jiffies_relative(unsigned long j, int cpu) 199unsigned long __round_jiffies_relative(unsigned long j, int cpu)
193{ 200{
194 /* 201 unsigned long j0 = jiffies;
195 * In theory the following code can skip a jiffy in case jiffies 202
196 * increments right between the addition and the later subtraction. 203 /* Use j0 because jiffies might change while we run */
197 * However since the entire point of this function is to use approximate 204 return round_jiffies_common(j + j0, cpu, false) - j0;
198 * timeouts, it's entirely ok to not handle that.
199 */
200 return __round_jiffies(j + jiffies, cpu) - jiffies;
201} 205}
202EXPORT_SYMBOL_GPL(__round_jiffies_relative); 206EXPORT_SYMBOL_GPL(__round_jiffies_relative);
203 207
@@ -218,7 +222,7 @@ EXPORT_SYMBOL_GPL(__round_jiffies_relative);
218 */ 222 */
219unsigned long round_jiffies(unsigned long j) 223unsigned long round_jiffies(unsigned long j)
220{ 224{
221 return __round_jiffies(j, raw_smp_processor_id()); 225 return round_jiffies_common(j, raw_smp_processor_id(), false);
222} 226}
223EXPORT_SYMBOL_GPL(round_jiffies); 227EXPORT_SYMBOL_GPL(round_jiffies);
224 228
@@ -243,6 +247,71 @@ unsigned long round_jiffies_relative(unsigned long j)
243} 247}
244EXPORT_SYMBOL_GPL(round_jiffies_relative); 248EXPORT_SYMBOL_GPL(round_jiffies_relative);
245 249
250/**
251 * __round_jiffies_up - function to round jiffies up to a full second
252 * @j: the time in (absolute) jiffies that should be rounded
253 * @cpu: the processor number on which the timeout will happen
254 *
255 * This is the same as __round_jiffies() except that it will never
256 * round down. This is useful for timeouts for which the exact time
257 * of firing does not matter too much, as long as they don't fire too
258 * early.
259 */
260unsigned long __round_jiffies_up(unsigned long j, int cpu)
261{
262 return round_jiffies_common(j, cpu, true);
263}
264EXPORT_SYMBOL_GPL(__round_jiffies_up);
265
266/**
267 * __round_jiffies_up_relative - function to round jiffies up to a full second
268 * @j: the time in (relative) jiffies that should be rounded
269 * @cpu: the processor number on which the timeout will happen
270 *
271 * This is the same as __round_jiffies_relative() except that it will never
272 * round down. This is useful for timeouts for which the exact time
273 * of firing does not matter too much, as long as they don't fire too
274 * early.
275 */
276unsigned long __round_jiffies_up_relative(unsigned long j, int cpu)
277{
278 unsigned long j0 = jiffies;
279
280 /* Use j0 because jiffies might change while we run */
281 return round_jiffies_common(j + j0, cpu, true) - j0;
282}
283EXPORT_SYMBOL_GPL(__round_jiffies_up_relative);
284
285/**
286 * round_jiffies_up - function to round jiffies up to a full second
287 * @j: the time in (absolute) jiffies that should be rounded
288 *
289 * This is the same as round_jiffies() except that it will never
290 * round down. This is useful for timeouts for which the exact time
291 * of firing does not matter too much, as long as they don't fire too
292 * early.
293 */
294unsigned long round_jiffies_up(unsigned long j)
295{
296 return round_jiffies_common(j, raw_smp_processor_id(), true);
297}
298EXPORT_SYMBOL_GPL(round_jiffies_up);
299
300/**
301 * round_jiffies_up_relative - function to round jiffies up to a full second
302 * @j: the time in (relative) jiffies that should be rounded
303 *
304 * This is the same as round_jiffies_relative() except that it will never
305 * round down. This is useful for timeouts for which the exact time
306 * of firing does not matter too much, as long as they don't fire too
307 * early.
308 */
309unsigned long round_jiffies_up_relative(unsigned long j)
310{
311 return __round_jiffies_up_relative(j, raw_smp_processor_id());
312}
313EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
314
246 315
247static inline void set_running_timer(struct tvec_base *base, 316static inline void set_running_timer(struct tvec_base *base,
248 struct timer_list *timer) 317 struct timer_list *timer)
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3f3380638646..2f76193c3489 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1060 1060
1061 /* Did the write stamp get updated already? */ 1061 /* Did the write stamp get updated already? */
1062 if (unlikely(ts < cpu_buffer->write_stamp)) 1062 if (unlikely(ts < cpu_buffer->write_stamp))
1063 goto again; 1063 delta = 0;
1064 1064
1065 if (test_time_stamp(delta)) { 1065 if (test_time_stamp(delta)) {
1066 1066
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5c97c5b4ea8f..ffe7c96fa09b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
1755 return TRACE_TYPE_HANDLED; 1755 return TRACE_TYPE_HANDLED;
1756 1756
1757 SEQ_PUT_FIELD_RET(s, entry->pid); 1757 SEQ_PUT_FIELD_RET(s, entry->pid);
1758 SEQ_PUT_FIELD_RET(s, iter->cpu); 1758 SEQ_PUT_FIELD_RET(s, entry->cpu);
1759 SEQ_PUT_FIELD_RET(s, iter->ts); 1759 SEQ_PUT_FIELD_RET(s, iter->ts);
1760 1760
1761 switch (entry->type) { 1761 switch (entry->type) {
@@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2676{ 2676{
2677 unsigned long val; 2677 unsigned long val;
2678 char buf[64]; 2678 char buf[64];
2679 int ret; 2679 int ret, cpu;
2680 struct trace_array *tr = filp->private_data; 2680 struct trace_array *tr = filp->private_data;
2681 2681
2682 if (cnt >= sizeof(buf)) 2682 if (cnt >= sizeof(buf))
@@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2704 goto out; 2704 goto out;
2705 } 2705 }
2706 2706
2707 /* disable all cpu buffers */
2708 for_each_tracing_cpu(cpu) {
2709 if (global_trace.data[cpu])
2710 atomic_inc(&global_trace.data[cpu]->disabled);
2711 if (max_tr.data[cpu])
2712 atomic_inc(&max_tr.data[cpu]->disabled);
2713 }
2714
2707 if (val != global_trace.entries) { 2715 if (val != global_trace.entries) {
2708 ret = ring_buffer_resize(global_trace.buffer, val); 2716 ret = ring_buffer_resize(global_trace.buffer, val);
2709 if (ret < 0) { 2717 if (ret < 0) {
@@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
2735 if (tracing_disabled) 2743 if (tracing_disabled)
2736 cnt = -ENOMEM; 2744 cnt = -ENOMEM;
2737 out: 2745 out:
2746 for_each_tracing_cpu(cpu) {
2747 if (global_trace.data[cpu])
2748 atomic_dec(&global_trace.data[cpu]->disabled);
2749 if (max_tr.data[cpu])
2750 atomic_dec(&max_tr.data[cpu]->disabled);
2751 }
2752
2738 max_tr.entries = global_trace.entries; 2753 max_tr.entries = global_trace.entries;
2739 mutex_unlock(&trace_types_lock); 2754 mutex_unlock(&trace_types_lock);
2740 2755
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f12ab5c4dec4..4952322cba45 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -970,6 +970,51 @@ undo:
970 return ret; 970 return ret;
971} 971}
972 972
973#ifdef CONFIG_SMP
974struct work_for_cpu {
975 struct work_struct work;
976 long (*fn)(void *);
977 void *arg;
978 long ret;
979};
980
981static void do_work_for_cpu(struct work_struct *w)
982{
983 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work);
984
985 wfc->ret = wfc->fn(wfc->arg);
986}
987
988/**
989 * work_on_cpu - run a function in user context on a particular cpu
990 * @cpu: the cpu to run on
991 * @fn: the function to run
992 * @arg: the function arg
993 *
994 * This will return -EINVAL in the cpu is not online, or the return value
995 * of @fn otherwise.
996 */
997long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
998{
999 struct work_for_cpu wfc;
1000
1001 INIT_WORK(&wfc.work, do_work_for_cpu);
1002 wfc.fn = fn;
1003 wfc.arg = arg;
1004 get_online_cpus();
1005 if (unlikely(!cpu_online(cpu)))
1006 wfc.ret = -EINVAL;
1007 else {
1008 schedule_work_on(cpu, &wfc.work);
1009 flush_work(&wfc.work);
1010 }
1011 put_online_cpus();
1012
1013 return wfc.ret;
1014}
1015EXPORT_SYMBOL_GPL(work_on_cpu);
1016#endif /* CONFIG_SMP */
1017
973void __init init_workqueues(void) 1018void __init init_workqueues(void)
974{ 1019{
975 cpu_populated_map = cpu_online_map; 1020 cpu_populated_map = cpu_online_map;