aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c17
-rw-r--r--kernel/events/core.c1
-rw-r--r--kernel/irq/chip.c8
-rw-r--r--kernel/irq/internals.h3
-rw-r--r--kernel/irq/manage.c39
-rw-r--r--kernel/irq/migration.c13
-rw-r--r--kernel/sched/core.c68
-rw-r--r--kernel/sched/fair.c42
-rw-r--r--kernel/sched/rt.c51
-rw-r--r--kernel/smpboot.c17
-rw-r--r--kernel/sys.c60
-rw-r--r--kernel/time/clockevents.c3
-rw-r--r--kernel/time/tick-sched.c19
-rw-r--r--kernel/time/timekeeping.c2
14 files changed, 227 insertions, 116 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0f3527d6184a..72fcd3069a90 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
896 mutex_unlock(&cgroup_mutex); 896 mutex_unlock(&cgroup_mutex);
897 897
898 /* 898 /*
899 * Drop the active superblock reference that we took when we 899 * We want to drop the active superblock reference from the
900 * created the cgroup 900 * cgroup creation after all the dentry refs are gone -
901 * kill_sb gets mighty unhappy otherwise. Mark
902 * dentry->d_fsdata with cgroup_diput() to tell
903 * cgroup_d_release() to call deactivate_super().
901 */ 904 */
902 deactivate_super(cgrp->root->sb); 905 dentry->d_fsdata = cgroup_diput;
903 906
904 /* 907 /*
905 * if we're getting rid of the cgroup, refcount should ensure 908 * if we're getting rid of the cgroup, refcount should ensure
@@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d)
925 return 1; 928 return 1;
926} 929}
927 930
931static void cgroup_d_release(struct dentry *dentry)
932{
933 /* did cgroup_diput() tell me to deactivate super? */
934 if (dentry->d_fsdata == cgroup_diput)
935 deactivate_super(dentry->d_sb);
936}
937
928static void remove_dir(struct dentry *d) 938static void remove_dir(struct dentry *d)
929{ 939{
930 struct dentry *parent = dget(d->d_parent); 940 struct dentry *parent = dget(d->d_parent);
@@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
1532 static const struct dentry_operations cgroup_dops = { 1542 static const struct dentry_operations cgroup_dops = {
1533 .d_iput = cgroup_diput, 1543 .d_iput = cgroup_diput,
1534 .d_delete = cgroup_delete, 1544 .d_delete = cgroup_delete,
1545 .d_release = cgroup_d_release,
1535 }; 1546 };
1536 1547
1537 struct inode *inode = 1548 struct inode *inode =
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5b06cbbf6931..f85c0154b333 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3181,7 +3181,6 @@ static void perf_event_for_each(struct perf_event *event,
3181 event = event->group_leader; 3181 event = event->group_leader;
3182 3182
3183 perf_event_for_each_child(event, func); 3183 perf_event_for_each_child(event, func);
3184 func(event);
3185 list_for_each_entry(sibling, &event->sibling_list, group_entry) 3184 list_for_each_entry(sibling, &event->sibling_list, group_entry)
3186 perf_event_for_each_child(sibling, func); 3185 perf_event_for_each_child(sibling, func);
3187 mutex_unlock(&ctx->mutex); 3186 mutex_unlock(&ctx->mutex);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index fc275e4f629b..eebd6d5cfb44 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -275,8 +275,10 @@ void handle_nested_irq(unsigned int irq)
275 kstat_incr_irqs_this_cpu(irq, desc); 275 kstat_incr_irqs_this_cpu(irq, desc);
276 276
277 action = desc->action; 277 action = desc->action;
278 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) 278 if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) {
279 desc->istate |= IRQS_PENDING;
279 goto out_unlock; 280 goto out_unlock;
281 }
280 282
281 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); 283 irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
282 raw_spin_unlock_irq(&desc->lock); 284 raw_spin_unlock_irq(&desc->lock);
@@ -324,8 +326,10 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
324 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); 326 desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
325 kstat_incr_irqs_this_cpu(irq, desc); 327 kstat_incr_irqs_this_cpu(irq, desc);
326 328
327 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) 329 if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
330 desc->istate |= IRQS_PENDING;
328 goto out_unlock; 331 goto out_unlock;
332 }
329 333
330 handle_irq_event(desc); 334 handle_irq_event(desc);
331 335
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 8e5c56b3b7d9..001fa5bab490 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -101,6 +101,9 @@ extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask);
101 101
102extern void irq_set_thread_affinity(struct irq_desc *desc); 102extern void irq_set_thread_affinity(struct irq_desc *desc);
103 103
104extern int irq_do_set_affinity(struct irq_data *data,
105 const struct cpumask *dest, bool force);
106
104/* Inline functions for support of irq chips on slow busses */ 107/* Inline functions for support of irq chips on slow busses */
105static inline void chip_bus_lock(struct irq_desc *desc) 108static inline void chip_bus_lock(struct irq_desc *desc)
106{ 109{
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ea0c6c2ae6f7..8c548232ba39 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -142,6 +142,25 @@ static inline void
142irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } 142irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
143#endif 143#endif
144 144
145int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
146 bool force)
147{
148 struct irq_desc *desc = irq_data_to_desc(data);
149 struct irq_chip *chip = irq_data_get_irq_chip(data);
150 int ret;
151
152 ret = chip->irq_set_affinity(data, mask, false);
153 switch (ret) {
154 case IRQ_SET_MASK_OK:
155 cpumask_copy(data->affinity, mask);
156 case IRQ_SET_MASK_OK_NOCOPY:
157 irq_set_thread_affinity(desc);
158 ret = 0;
159 }
160
161 return ret;
162}
163
145int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) 164int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
146{ 165{
147 struct irq_chip *chip = irq_data_get_irq_chip(data); 166 struct irq_chip *chip = irq_data_get_irq_chip(data);
@@ -152,14 +171,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
152 return -EINVAL; 171 return -EINVAL;
153 172
154 if (irq_can_move_pcntxt(data)) { 173 if (irq_can_move_pcntxt(data)) {
155 ret = chip->irq_set_affinity(data, mask, false); 174 ret = irq_do_set_affinity(data, mask, false);
156 switch (ret) {
157 case IRQ_SET_MASK_OK:
158 cpumask_copy(data->affinity, mask);
159 case IRQ_SET_MASK_OK_NOCOPY:
160 irq_set_thread_affinity(desc);
161 ret = 0;
162 }
163 } else { 175 } else {
164 irqd_set_move_pending(data); 176 irqd_set_move_pending(data);
165 irq_copy_pending(desc, mask); 177 irq_copy_pending(desc, mask);
@@ -283,9 +295,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier);
283static int 295static int
284setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) 296setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
285{ 297{
286 struct irq_chip *chip = irq_desc_get_chip(desc);
287 struct cpumask *set = irq_default_affinity; 298 struct cpumask *set = irq_default_affinity;
288 int ret, node = desc->irq_data.node; 299 int node = desc->irq_data.node;
289 300
290 /* Excludes PER_CPU and NO_BALANCE interrupts */ 301 /* Excludes PER_CPU and NO_BALANCE interrupts */
291 if (!irq_can_set_affinity(irq)) 302 if (!irq_can_set_affinity(irq))
@@ -311,13 +322,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
311 if (cpumask_intersects(mask, nodemask)) 322 if (cpumask_intersects(mask, nodemask))
312 cpumask_and(mask, mask, nodemask); 323 cpumask_and(mask, mask, nodemask);
313 } 324 }
314 ret = chip->irq_set_affinity(&desc->irq_data, mask, false); 325 irq_do_set_affinity(&desc->irq_data, mask, false);
315 switch (ret) {
316 case IRQ_SET_MASK_OK:
317 cpumask_copy(desc->irq_data.affinity, mask);
318 case IRQ_SET_MASK_OK_NOCOPY:
319 irq_set_thread_affinity(desc);
320 }
321 return 0; 326 return 0;
322} 327}
323#else 328#else
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index c3c89751b327..ca3f4aaff707 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -42,17 +42,8 @@ void irq_move_masked_irq(struct irq_data *idata)
42 * For correct operation this depends on the caller 42 * For correct operation this depends on the caller
43 * masking the irqs. 43 * masking the irqs.
44 */ 44 */
45 if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) 45 if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids)
46 < nr_cpu_ids)) { 46 irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false);
47 int ret = chip->irq_set_affinity(&desc->irq_data,
48 desc->pending_mask, false);
49 switch (ret) {
50 case IRQ_SET_MASK_OK:
51 cpumask_copy(desc->irq_data.affinity, desc->pending_mask);
52 case IRQ_SET_MASK_OK_NOCOPY:
53 irq_set_thread_affinity(desc);
54 }
55 }
56 47
57 cpumask_clear(desc->pending_mask); 48 cpumask_clear(desc->pending_mask);
58} 49}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 39eb6011bc38..c46958e26121 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features =
142#define SCHED_FEAT(name, enabled) \ 142#define SCHED_FEAT(name, enabled) \
143 #name , 143 #name ,
144 144
145static __read_mostly char *sched_feat_names[] = { 145static const char * const sched_feat_names[] = {
146#include "features.h" 146#include "features.h"
147 NULL
148}; 147};
149 148
150#undef SCHED_FEAT 149#undef SCHED_FEAT
@@ -2517,25 +2516,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
2517 sched_avg_update(this_rq); 2516 sched_avg_update(this_rq);
2518} 2517}
2519 2518
2519#ifdef CONFIG_NO_HZ
2520/*
2521 * There is no sane way to deal with nohz on smp when using jiffies because the
2522 * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
2523 * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
2524 *
2525 * Therefore we cannot use the delta approach from the regular tick since that
2526 * would seriously skew the load calculation. However we'll make do for those
2527 * updates happening while idle (nohz_idle_balance) or coming out of idle
2528 * (tick_nohz_idle_exit).
2529 *
2530 * This means we might still be one tick off for nohz periods.
2531 */
2532
2520/* 2533/*
2521 * Called from nohz_idle_balance() to update the load ratings before doing the 2534 * Called from nohz_idle_balance() to update the load ratings before doing the
2522 * idle balance. 2535 * idle balance.
2523 */ 2536 */
2524void update_idle_cpu_load(struct rq *this_rq) 2537void update_idle_cpu_load(struct rq *this_rq)
2525{ 2538{
2526 unsigned long curr_jiffies = jiffies; 2539 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2527 unsigned long load = this_rq->load.weight; 2540 unsigned long load = this_rq->load.weight;
2528 unsigned long pending_updates; 2541 unsigned long pending_updates;
2529 2542
2530 /* 2543 /*
2531 * Bloody broken means of dealing with nohz, but better than nothing.. 2544 * bail if there's load or we're actually up-to-date.
2532 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
2533 * update and see 0 difference the one time and 2 the next, even though
2534 * we ticked at roughtly the same rate.
2535 *
2536 * Hence we only use this from nohz_idle_balance() and skip this
2537 * nonsense when called from the scheduler_tick() since that's
2538 * guaranteed a stable rate.
2539 */ 2545 */
2540 if (load || curr_jiffies == this_rq->last_load_update_tick) 2546 if (load || curr_jiffies == this_rq->last_load_update_tick)
2541 return; 2547 return;
@@ -2547,12 +2553,38 @@ void update_idle_cpu_load(struct rq *this_rq)
2547} 2553}
2548 2554
2549/* 2555/*
2556 * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
2557 */
2558void update_cpu_load_nohz(void)
2559{
2560 struct rq *this_rq = this_rq();
2561 unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
2562 unsigned long pending_updates;
2563
2564 if (curr_jiffies == this_rq->last_load_update_tick)
2565 return;
2566
2567 raw_spin_lock(&this_rq->lock);
2568 pending_updates = curr_jiffies - this_rq->last_load_update_tick;
2569 if (pending_updates) {
2570 this_rq->last_load_update_tick = curr_jiffies;
2571 /*
2572 * We were idle, this means load 0, the current load might be
2573 * !0 due to remote wakeups and the sort.
2574 */
2575 __update_cpu_load(this_rq, 0, pending_updates);
2576 }
2577 raw_spin_unlock(&this_rq->lock);
2578}
2579#endif /* CONFIG_NO_HZ */
2580
2581/*
2550 * Called from scheduler_tick() 2582 * Called from scheduler_tick()
2551 */ 2583 */
2552static void update_cpu_load_active(struct rq *this_rq) 2584static void update_cpu_load_active(struct rq *this_rq)
2553{ 2585{
2554 /* 2586 /*
2555 * See the mess in update_idle_cpu_load(). 2587 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
2556 */ 2588 */
2557 this_rq->last_load_update_tick = jiffies; 2589 this_rq->last_load_update_tick = jiffies;
2558 __update_cpu_load(this_rq, this_rq->load.weight, 1); 2590 __update_cpu_load(this_rq, this_rq->load.weight, 1);
@@ -4982,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
4982 p->sched_class->set_cpus_allowed(p, new_mask); 5014 p->sched_class->set_cpus_allowed(p, new_mask);
4983 5015
4984 cpumask_copy(&p->cpus_allowed, new_mask); 5016 cpumask_copy(&p->cpus_allowed, new_mask);
4985 p->rt.nr_cpus_allowed = cpumask_weight(new_mask); 5017 p->nr_cpus_allowed = cpumask_weight(new_mask);
4986} 5018}
4987 5019
4988/* 5020/*
@@ -5997,11 +6029,14 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5997 6029
5998 cpumask_or(covered, covered, sg_span); 6030 cpumask_or(covered, covered, sg_span);
5999 6031
6000 sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); 6032 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6001 atomic_inc(&sg->sgp->ref); 6033 atomic_inc(&sg->sgp->ref);
6002 6034
6003 if (cpumask_test_cpu(cpu, sg_span)) 6035 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6036 cpumask_first(sg_span) == cpu) {
6037 WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
6004 groups = sg; 6038 groups = sg;
6039 }
6005 6040
6006 if (!first) 6041 if (!first)
6007 first = sg; 6042 first = sg;
@@ -6403,7 +6438,7 @@ static void sched_init_numa(void)
6403 return; 6438 return;
6404 6439
6405 for (j = 0; j < nr_node_ids; j++) { 6440 for (j = 0; j < nr_node_ids; j++) {
6406 struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); 6441 struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
6407 if (!mask) 6442 if (!mask)
6408 return; 6443 return;
6409 6444
@@ -6691,7 +6726,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
6691 if (!doms_cur) 6726 if (!doms_cur)
6692 doms_cur = &fallback_doms; 6727 doms_cur = &fallback_doms;
6693 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); 6728 cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
6694 dattr_cur = NULL;
6695 err = build_sched_domains(doms_cur[0], NULL); 6729 err = build_sched_domains(doms_cur[0], NULL);
6696 register_sched_domain_sysctl(); 6730 register_sched_domain_sysctl();
6697 6731
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 940e6d17cf96..b2a2d236f27b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
2703 int want_sd = 1; 2703 int want_sd = 1;
2704 int sync = wake_flags & WF_SYNC; 2704 int sync = wake_flags & WF_SYNC;
2705 2705
2706 if (p->rt.nr_cpus_allowed == 1) 2706 if (p->nr_cpus_allowed == 1)
2707 return prev_cpu; 2707 return prev_cpu;
2708 2708
2709 if (sd_flag & SD_BALANCE_WAKE) { 2709 if (sd_flag & SD_BALANCE_WAKE) {
@@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu)
3503unsigned long scale_rt_power(int cpu) 3503unsigned long scale_rt_power(int cpu)
3504{ 3504{
3505 struct rq *rq = cpu_rq(cpu); 3505 struct rq *rq = cpu_rq(cpu);
3506 u64 total, available; 3506 u64 total, available, age_stamp, avg;
3507 3507
3508 total = sched_avg_period() + (rq->clock - rq->age_stamp); 3508 /*
3509 * Since we're reading these variables without serialization make sure
3510 * we read them once before doing sanity checks on them.
3511 */
3512 age_stamp = ACCESS_ONCE(rq->age_stamp);
3513 avg = ACCESS_ONCE(rq->rt_avg);
3514
3515 total = sched_avg_period() + (rq->clock - age_stamp);
3509 3516
3510 if (unlikely(total < rq->rt_avg)) { 3517 if (unlikely(total < avg)) {
3511 /* Ensures that power won't end up being negative */ 3518 /* Ensures that power won't end up being negative */
3512 available = 0; 3519 available = 0;
3513 } else { 3520 } else {
3514 available = total - rq->rt_avg; 3521 available = total - avg;
3515 } 3522 }
3516 3523
3517 if (unlikely((s64)total < SCHED_POWER_SCALE)) 3524 if (unlikely((s64)total < SCHED_POWER_SCALE))
@@ -3574,11 +3581,26 @@ void update_group_power(struct sched_domain *sd, int cpu)
3574 3581
3575 power = 0; 3582 power = 0;
3576 3583
3577 group = child->groups; 3584 if (child->flags & SD_OVERLAP) {
3578 do { 3585 /*
3579 power += group->sgp->power; 3586 * SD_OVERLAP domains cannot assume that child groups
3580 group = group->next; 3587 * span the current group.
3581 } while (group != child->groups); 3588 */
3589
3590 for_each_cpu(cpu, sched_group_cpus(sdg))
3591 power += power_of(cpu);
3592 } else {
3593 /*
3594 * !SD_OVERLAP domains can assume that child groups
3595 * span the current group.
3596 */
3597
3598 group = child->groups;
3599 do {
3600 power += group->sgp->power;
3601 group = group->next;
3602 } while (group != child->groups);
3603 }
3582 3604
3583 sdg->sgp->power = power; 3605 sdg->sgp->power = power;
3584} 3606}
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c5565c3c515f..2a4e8dffbd6b 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq)
274 274
275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 275static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
276{ 276{
277 struct task_struct *p;
278
277 if (!rt_entity_is_task(rt_se)) 279 if (!rt_entity_is_task(rt_se))
278 return; 280 return;
279 281
282 p = rt_task_of(rt_se);
280 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 283 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
281 284
282 rt_rq->rt_nr_total++; 285 rt_rq->rt_nr_total++;
283 if (rt_se->nr_cpus_allowed > 1) 286 if (p->nr_cpus_allowed > 1)
284 rt_rq->rt_nr_migratory++; 287 rt_rq->rt_nr_migratory++;
285 288
286 update_rt_migration(rt_rq); 289 update_rt_migration(rt_rq);
@@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
288 291
289static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 292static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
290{ 293{
294 struct task_struct *p;
295
291 if (!rt_entity_is_task(rt_se)) 296 if (!rt_entity_is_task(rt_se))
292 return; 297 return;
293 298
299 p = rt_task_of(rt_se);
294 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 300 rt_rq = &rq_of_rt_rq(rt_rq)->rt;
295 301
296 rt_rq->rt_nr_total--; 302 rt_rq->rt_nr_total--;
297 if (rt_se->nr_cpus_allowed > 1) 303 if (p->nr_cpus_allowed > 1)
298 rt_rq->rt_nr_migratory--; 304 rt_rq->rt_nr_migratory--;
299 305
300 update_rt_migration(rt_rq); 306 update_rt_migration(rt_rq);
@@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
1161 1167
1162 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1168 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
1163 1169
1164 if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) 1170 if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
1165 enqueue_pushable_task(rq, p); 1171 enqueue_pushable_task(rq, p);
1166 1172
1167 inc_nr_running(rq); 1173 inc_nr_running(rq);
@@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1225 1231
1226 cpu = task_cpu(p); 1232 cpu = task_cpu(p);
1227 1233
1228 if (p->rt.nr_cpus_allowed == 1) 1234 if (p->nr_cpus_allowed == 1)
1229 goto out; 1235 goto out;
1230 1236
1231 /* For anything but wake ups, just return the task_cpu */ 1237 /* For anything but wake ups, just return the task_cpu */
@@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
1260 * will have to sort it out. 1266 * will have to sort it out.
1261 */ 1267 */
1262 if (curr && unlikely(rt_task(curr)) && 1268 if (curr && unlikely(rt_task(curr)) &&
1263 (curr->rt.nr_cpus_allowed < 2 || 1269 (curr->nr_cpus_allowed < 2 ||
1264 curr->prio <= p->prio) && 1270 curr->prio <= p->prio) &&
1265 (p->rt.nr_cpus_allowed > 1)) { 1271 (p->nr_cpus_allowed > 1)) {
1266 int target = find_lowest_rq(p); 1272 int target = find_lowest_rq(p);
1267 1273
1268 if (target != -1) 1274 if (target != -1)
@@ -1276,10 +1282,10 @@ out:
1276 1282
1277static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1283static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1278{ 1284{
1279 if (rq->curr->rt.nr_cpus_allowed == 1) 1285 if (rq->curr->nr_cpus_allowed == 1)
1280 return; 1286 return;
1281 1287
1282 if (p->rt.nr_cpus_allowed != 1 1288 if (p->nr_cpus_allowed != 1
1283 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1289 && cpupri_find(&rq->rd->cpupri, p, NULL))
1284 return; 1290 return;
1285 1291
@@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1395 * The previous task needs to be made eligible for pushing 1401 * The previous task needs to be made eligible for pushing
1396 * if it is still active 1402 * if it is still active
1397 */ 1403 */
1398 if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) 1404 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
1399 enqueue_pushable_task(rq, p); 1405 enqueue_pushable_task(rq, p);
1400} 1406}
1401 1407
@@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
1408{ 1414{
1409 if (!task_running(rq, p) && 1415 if (!task_running(rq, p) &&
1410 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && 1416 (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
1411 (p->rt.nr_cpus_allowed > 1)) 1417 (p->nr_cpus_allowed > 1))
1412 return 1; 1418 return 1;
1413 return 0; 1419 return 0;
1414} 1420}
@@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task)
1464 if (unlikely(!lowest_mask)) 1470 if (unlikely(!lowest_mask))
1465 return -1; 1471 return -1;
1466 1472
1467 if (task->rt.nr_cpus_allowed == 1) 1473 if (task->nr_cpus_allowed == 1)
1468 return -1; /* No other targets possible */ 1474 return -1; /* No other targets possible */
1469 1475
1470 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1476 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
1586 1592
1587 BUG_ON(rq->cpu != task_cpu(p)); 1593 BUG_ON(rq->cpu != task_cpu(p));
1588 BUG_ON(task_current(rq, p)); 1594 BUG_ON(task_current(rq, p));
1589 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1595 BUG_ON(p->nr_cpus_allowed <= 1);
1590 1596
1591 BUG_ON(!p->on_rq); 1597 BUG_ON(!p->on_rq);
1592 BUG_ON(!rt_task(p)); 1598 BUG_ON(!rt_task(p));
@@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
1793 if (!task_running(rq, p) && 1799 if (!task_running(rq, p) &&
1794 !test_tsk_need_resched(rq->curr) && 1800 !test_tsk_need_resched(rq->curr) &&
1795 has_pushable_tasks(rq) && 1801 has_pushable_tasks(rq) &&
1796 p->rt.nr_cpus_allowed > 1 && 1802 p->nr_cpus_allowed > 1 &&
1797 rt_task(rq->curr) && 1803 rt_task(rq->curr) &&
1798 (rq->curr->rt.nr_cpus_allowed < 2 || 1804 (rq->curr->nr_cpus_allowed < 2 ||
1799 rq->curr->prio <= p->prio)) 1805 rq->curr->prio <= p->prio))
1800 push_rt_tasks(rq); 1806 push_rt_tasks(rq);
1801} 1807}
@@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1817 * Only update if the process changes its state from whether it 1823 * Only update if the process changes its state from whether it
1818 * can migrate or not. 1824 * can migrate or not.
1819 */ 1825 */
1820 if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) 1826 if ((p->nr_cpus_allowed > 1) == (weight > 1))
1821 return; 1827 return;
1822 1828
1823 rq = task_rq(p); 1829 rq = task_rq(p);
@@ -1979,6 +1985,8 @@ static void watchdog(struct rq *rq, struct task_struct *p)
1979 1985
1980static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 1986static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1981{ 1987{
1988 struct sched_rt_entity *rt_se = &p->rt;
1989
1982 update_curr_rt(rq); 1990 update_curr_rt(rq);
1983 1991
1984 watchdog(rq, p); 1992 watchdog(rq, p);
@@ -1996,12 +2004,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1996 p->rt.time_slice = RR_TIMESLICE; 2004 p->rt.time_slice = RR_TIMESLICE;
1997 2005
1998 /* 2006 /*
1999 * Requeue to the end of queue if we are not the only element 2007 * Requeue to the end of queue if we (and all of our ancestors) are the
2000 * on the queue: 2008 * only element on the queue
2001 */ 2009 */
2002 if (p->rt.run_list.prev != p->rt.run_list.next) { 2010 for_each_sched_rt_entity(rt_se) {
2003 requeue_task_rt(rq, p, 0); 2011 if (rt_se->run_list.prev != rt_se->run_list.next) {
2004 set_tsk_need_resched(p); 2012 requeue_task_rt(rq, p, 0);
2013 set_tsk_need_resched(p);
2014 return;
2015 }
2005 } 2016 }
2006} 2017}
2007 2018
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index e1a797e028a3..98f60c5caa1b 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -31,6 +31,12 @@ void __init idle_thread_set_boot_cpu(void)
31 per_cpu(idle_threads, smp_processor_id()) = current; 31 per_cpu(idle_threads, smp_processor_id()) = current;
32} 32}
33 33
34/**
35 * idle_init - Initialize the idle thread for a cpu
36 * @cpu: The cpu for which the idle thread should be initialized
37 *
38 * Creates the thread if it does not exist.
39 */
34static inline void idle_init(unsigned int cpu) 40static inline void idle_init(unsigned int cpu)
35{ 41{
36 struct task_struct *tsk = per_cpu(idle_threads, cpu); 42 struct task_struct *tsk = per_cpu(idle_threads, cpu);
@@ -45,17 +51,16 @@ static inline void idle_init(unsigned int cpu)
45} 51}
46 52
47/** 53/**
48 * idle_thread_init - Initialize the idle thread for a cpu 54 * idle_threads_init - Initialize idle threads for all cpus
49 * @cpu: The cpu for which the idle thread should be initialized
50 *
51 * Creates the thread if it does not exist.
52 */ 55 */
53void __init idle_threads_init(void) 56void __init idle_threads_init(void)
54{ 57{
55 unsigned int cpu; 58 unsigned int cpu, boot_cpu;
59
60 boot_cpu = smp_processor_id();
56 61
57 for_each_possible_cpu(cpu) { 62 for_each_possible_cpu(cpu) {
58 if (cpu != smp_processor_id()) 63 if (cpu != boot_cpu)
59 idle_init(cpu); 64 idle_init(cpu);
60 } 65 }
61} 66}
diff --git a/kernel/sys.c b/kernel/sys.c
index 9ff89cb9657a..f0ec44dcd415 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask)
1786} 1786}
1787 1787
1788#ifdef CONFIG_CHECKPOINT_RESTORE 1788#ifdef CONFIG_CHECKPOINT_RESTORE
1789static bool vma_flags_mismatch(struct vm_area_struct *vma,
1790 unsigned long required,
1791 unsigned long banned)
1792{
1793 return (vma->vm_flags & required) != required ||
1794 (vma->vm_flags & banned);
1795}
1796
1797static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) 1789static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1798{ 1790{
1791 struct vm_area_struct *vma;
1799 struct file *exe_file; 1792 struct file *exe_file;
1800 struct dentry *dentry; 1793 struct dentry *dentry;
1801 int err; 1794 int err;
1802 1795
1803 /*
1804 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
1805 * remain. So perform a quick test first.
1806 */
1807 if (mm->num_exe_file_vmas)
1808 return -EBUSY;
1809
1810 exe_file = fget(fd); 1796 exe_file = fget(fd);
1811 if (!exe_file) 1797 if (!exe_file)
1812 return -EBADF; 1798 return -EBADF;
@@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
1827 if (err) 1813 if (err)
1828 goto exit; 1814 goto exit;
1829 1815
1816 down_write(&mm->mmap_sem);
1817
1818 /*
1819 * Forbid mm->exe_file change if there are mapped other files.
1820 */
1821 err = -EBUSY;
1822 for (vma = mm->mmap; vma; vma = vma->vm_next) {
1823 if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
1824 &exe_file->f_path))
1825 goto exit_unlock;
1826 }
1827
1830 /* 1828 /*
1831 * The symlink can be changed only once, just to disallow arbitrary 1829 * The symlink can be changed only once, just to disallow arbitrary
1832 * transitions malicious software might bring in. This means one 1830 * transitions malicious software might bring in. This means one
1833 * could make a snapshot over all processes running and monitor 1831 * could make a snapshot over all processes running and monitor
1834 * /proc/pid/exe changes to notice unusual activity if needed. 1832 * /proc/pid/exe changes to notice unusual activity if needed.
1835 */ 1833 */
1836 down_write(&mm->mmap_sem); 1834 err = -EPERM;
1837 if (likely(!mm->exe_file)) 1835 if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
1838 set_mm_exe_file(mm, exe_file); 1836 goto exit_unlock;
1839 else 1837
1840 err = -EBUSY; 1838 set_mm_exe_file(mm, exe_file);
1839exit_unlock:
1841 up_write(&mm->mmap_sem); 1840 up_write(&mm->mmap_sem);
1842 1841
1843exit: 1842exit:
@@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr,
1862 if (opt == PR_SET_MM_EXE_FILE) 1861 if (opt == PR_SET_MM_EXE_FILE)
1863 return prctl_set_mm_exe_file(mm, (unsigned int)addr); 1862 return prctl_set_mm_exe_file(mm, (unsigned int)addr);
1864 1863
1865 if (addr >= TASK_SIZE) 1864 if (addr >= TASK_SIZE || addr < mmap_min_addr)
1866 return -EINVAL; 1865 return -EINVAL;
1867 1866
1868 error = -EINVAL; 1867 error = -EINVAL;
@@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr,
1924 error = -EFAULT; 1923 error = -EFAULT;
1925 goto out; 1924 goto out;
1926 } 1925 }
1927#ifdef CONFIG_STACK_GROWSUP
1928 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
1929#else
1930 if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
1931#endif
1932 goto out;
1933 if (opt == PR_SET_MM_START_STACK) 1926 if (opt == PR_SET_MM_START_STACK)
1934 mm->start_stack = addr; 1927 mm->start_stack = addr;
1935 else if (opt == PR_SET_MM_ARG_START) 1928 else if (opt == PR_SET_MM_ARG_START)
@@ -1981,12 +1974,22 @@ out:
1981 up_read(&mm->mmap_sem); 1974 up_read(&mm->mmap_sem);
1982 return error; 1975 return error;
1983} 1976}
1977
1978static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1979{
1980 return put_user(me->clear_child_tid, tid_addr);
1981}
1982
1984#else /* CONFIG_CHECKPOINT_RESTORE */ 1983#else /* CONFIG_CHECKPOINT_RESTORE */
1985static int prctl_set_mm(int opt, unsigned long addr, 1984static int prctl_set_mm(int opt, unsigned long addr,
1986 unsigned long arg4, unsigned long arg5) 1985 unsigned long arg4, unsigned long arg5)
1987{ 1986{
1988 return -EINVAL; 1987 return -EINVAL;
1989} 1988}
1989static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
1990{
1991 return -EINVAL;
1992}
1990#endif 1993#endif
1991 1994
1992SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, 1995SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
@@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
2124 else 2127 else
2125 return -EINVAL; 2128 return -EINVAL;
2126 break; 2129 break;
2130 case PR_GET_TID_ADDRESS:
2131 error = prctl_get_tid_address(me, (int __user **)arg2);
2132 break;
2127 default: 2133 default:
2128 return -EINVAL; 2134 return -EINVAL;
2129 } 2135 }
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 9cd928f7a7c6..7e1ce012a851 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev)
297} 297}
298EXPORT_SYMBOL_GPL(clockevents_register_device); 298EXPORT_SYMBOL_GPL(clockevents_register_device);
299 299
300static void clockevents_config(struct clock_event_device *dev, 300void clockevents_config(struct clock_event_device *dev, u32 freq)
301 u32 freq)
302{ 301{
303 u64 sec; 302 u64 sec;
304 303
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..da70c6db496c 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void)
576 /* Update jiffies first */ 576 /* Update jiffies first */
577 select_nohz_load_balancer(0); 577 select_nohz_load_balancer(0);
578 tick_do_update_jiffies64(now); 578 tick_do_update_jiffies64(now);
579 update_cpu_load_nohz();
579 580
580#ifndef CONFIG_VIRT_CPU_ACCOUNTING 581#ifndef CONFIG_VIRT_CPU_ACCOUNTING
581 /* 582 /*
@@ -814,6 +815,16 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
814 return HRTIMER_RESTART; 815 return HRTIMER_RESTART;
815} 816}
816 817
818static int sched_skew_tick;
819
820static int __init skew_tick(char *str)
821{
822 get_option(&str, &sched_skew_tick);
823
824 return 0;
825}
826early_param("skew_tick", skew_tick);
827
817/** 828/**
818 * tick_setup_sched_timer - setup the tick emulation timer 829 * tick_setup_sched_timer - setup the tick emulation timer
819 */ 830 */
@@ -831,6 +842,14 @@ void tick_setup_sched_timer(void)
831 /* Get the next period (per cpu) */ 842 /* Get the next period (per cpu) */
832 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 843 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
833 844
845 /* Offset the tick to avert xtime_lock contention. */
846 if (sched_skew_tick) {
847 u64 offset = ktime_to_ns(tick_period) >> 1;
848 do_div(offset, num_possible_cpus());
849 offset *= smp_processor_id();
850 hrtimer_add_expires_ns(&ts->sched_timer, offset);
851 }
852
834 for (;;) { 853 for (;;) {
835 hrtimer_forward(&ts->sched_timer, now, tick_period); 854 hrtimer_forward(&ts->sched_timer, now, tick_period);
836 hrtimer_start_expires(&ts->sched_timer, 855 hrtimer_start_expires(&ts->sched_timer,
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 6e46cacf5969..6f46a00a1e8a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -962,6 +962,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift)
962 timekeeper.xtime.tv_sec++; 962 timekeeper.xtime.tv_sec++;
963 leap = second_overflow(timekeeper.xtime.tv_sec); 963 leap = second_overflow(timekeeper.xtime.tv_sec);
964 timekeeper.xtime.tv_sec += leap; 964 timekeeper.xtime.tv_sec += leap;
965 timekeeper.wall_to_monotonic.tv_sec -= leap;
965 } 966 }
966 967
967 /* Accumulate raw time */ 968 /* Accumulate raw time */
@@ -1077,6 +1078,7 @@ static void update_wall_time(void)
1077 timekeeper.xtime.tv_sec++; 1078 timekeeper.xtime.tv_sec++;
1078 leap = second_overflow(timekeeper.xtime.tv_sec); 1079 leap = second_overflow(timekeeper.xtime.tv_sec);
1079 timekeeper.xtime.tv_sec += leap; 1080 timekeeper.xtime.tv_sec += leap;
1081 timekeeper.wall_to_monotonic.tv_sec -= leap;
1080 } 1082 }
1081 1083
1082 timekeeping_update(false); 1084 timekeeping_update(false);