aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-09-19 23:51:04 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-09-20 01:52:38 -0400
commit1cce058b29e7eb8a71a72d8bb87eb7b4e0401c22 (patch)
tree493477b7a19606c66de2f003bca83672f2164cad /kernel
parentc26afe9e8591f306d79aab8071f1d34e4f60b700 (diff)
parent9d037a777695993ec7437e5f451647dea7919d4c (diff)
Merge remote-tracking branch 'origin/master' into next
(Merge in order to get the PCIe mps/mrss code fixes)
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c67
-rw-r--r--kernel/irq/chip.c2
-rw-r--r--kernel/sched.c43
-rw-r--r--kernel/taskstats.c1
-rw-r--r--kernel/time/alarmtimer.c18
-rw-r--r--kernel/tsacct.c15
-rw-r--r--kernel/workqueue.c7
7 files changed, 111 insertions, 42 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b8785e26ee1c..0f857782d06f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -399,14 +399,54 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
399 local_irq_restore(flags); 399 local_irq_restore(flags);
400} 400}
401 401
402static inline void perf_cgroup_sched_out(struct task_struct *task) 402static inline void perf_cgroup_sched_out(struct task_struct *task,
403 struct task_struct *next)
403{ 404{
404 perf_cgroup_switch(task, PERF_CGROUP_SWOUT); 405 struct perf_cgroup *cgrp1;
406 struct perf_cgroup *cgrp2 = NULL;
407
408 /*
409 * we come here when we know perf_cgroup_events > 0
410 */
411 cgrp1 = perf_cgroup_from_task(task);
412
413 /*
414 * next is NULL when called from perf_event_enable_on_exec()
415 * that will systematically cause a cgroup_switch()
416 */
417 if (next)
418 cgrp2 = perf_cgroup_from_task(next);
419
420 /*
421 * only schedule out current cgroup events if we know
422 * that we are switching to a different cgroup. Otherwise,
423 * do no touch the cgroup events.
424 */
425 if (cgrp1 != cgrp2)
426 perf_cgroup_switch(task, PERF_CGROUP_SWOUT);
405} 427}
406 428
407static inline void perf_cgroup_sched_in(struct task_struct *task) 429static inline void perf_cgroup_sched_in(struct task_struct *prev,
430 struct task_struct *task)
408{ 431{
409 perf_cgroup_switch(task, PERF_CGROUP_SWIN); 432 struct perf_cgroup *cgrp1;
433 struct perf_cgroup *cgrp2 = NULL;
434
435 /*
436 * we come here when we know perf_cgroup_events > 0
437 */
438 cgrp1 = perf_cgroup_from_task(task);
439
440 /* prev can never be NULL */
441 cgrp2 = perf_cgroup_from_task(prev);
442
443 /*
444 * only need to schedule in cgroup events if we are changing
445 * cgroup during ctxsw. Cgroup events were not scheduled
446 * out of ctxsw out if that was not the case.
447 */
448 if (cgrp1 != cgrp2)
449 perf_cgroup_switch(task, PERF_CGROUP_SWIN);
410} 450}
411 451
412static inline int perf_cgroup_connect(int fd, struct perf_event *event, 452static inline int perf_cgroup_connect(int fd, struct perf_event *event,
@@ -518,11 +558,13 @@ static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
518{ 558{
519} 559}
520 560
521static inline void perf_cgroup_sched_out(struct task_struct *task) 561static inline void perf_cgroup_sched_out(struct task_struct *task,
562 struct task_struct *next)
522{ 563{
523} 564}
524 565
525static inline void perf_cgroup_sched_in(struct task_struct *task) 566static inline void perf_cgroup_sched_in(struct task_struct *prev,
567 struct task_struct *task)
526{ 568{
527} 569}
528 570
@@ -1988,7 +2030,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
1988 * cgroup event are system-wide mode only 2030 * cgroup event are system-wide mode only
1989 */ 2031 */
1990 if (atomic_read(&__get_cpu_var(perf_cgroup_events))) 2032 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
1991 perf_cgroup_sched_out(task); 2033 perf_cgroup_sched_out(task, next);
1992} 2034}
1993 2035
1994static void task_ctx_sched_out(struct perf_event_context *ctx) 2036static void task_ctx_sched_out(struct perf_event_context *ctx)
@@ -2153,7 +2195,8 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2153 * accessing the event control register. If a NMI hits, then it will 2195 * accessing the event control register. If a NMI hits, then it will
2154 * keep the event running. 2196 * keep the event running.
2155 */ 2197 */
2156void __perf_event_task_sched_in(struct task_struct *task) 2198void __perf_event_task_sched_in(struct task_struct *prev,
2199 struct task_struct *task)
2157{ 2200{
2158 struct perf_event_context *ctx; 2201 struct perf_event_context *ctx;
2159 int ctxn; 2202 int ctxn;
@@ -2171,7 +2214,7 @@ void __perf_event_task_sched_in(struct task_struct *task)
2171 * cgroup event are system-wide mode only 2214 * cgroup event are system-wide mode only
2172 */ 2215 */
2173 if (atomic_read(&__get_cpu_var(perf_cgroup_events))) 2216 if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
2174 perf_cgroup_sched_in(task); 2217 perf_cgroup_sched_in(prev, task);
2175} 2218}
2176 2219
2177static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) 2220static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -2427,7 +2470,7 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx)
2427 * ctxswin cgroup events which are already scheduled 2470 * ctxswin cgroup events which are already scheduled
2428 * in. 2471 * in.
2429 */ 2472 */
2430 perf_cgroup_sched_out(current); 2473 perf_cgroup_sched_out(current, NULL);
2431 2474
2432 raw_spin_lock(&ctx->lock); 2475 raw_spin_lock(&ctx->lock);
2433 task_ctx_sched_out(ctx); 2476 task_ctx_sched_out(ctx);
@@ -3353,8 +3396,8 @@ static int perf_event_index(struct perf_event *event)
3353} 3396}
3354 3397
3355static void calc_timer_values(struct perf_event *event, 3398static void calc_timer_values(struct perf_event *event,
3356 u64 *running, 3399 u64 *enabled,
3357 u64 *enabled) 3400 u64 *running)
3358{ 3401{
3359 u64 now, ctx_time; 3402 u64 now, ctx_time;
3360 3403
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d5a3009da71a..dc5114b4c16c 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -178,7 +178,7 @@ void irq_shutdown(struct irq_desc *desc)
178 desc->depth = 1; 178 desc->depth = 1;
179 if (desc->irq_data.chip->irq_shutdown) 179 if (desc->irq_data.chip->irq_shutdown)
180 desc->irq_data.chip->irq_shutdown(&desc->irq_data); 180 desc->irq_data.chip->irq_shutdown(&desc->irq_data);
181 if (desc->irq_data.chip->irq_disable) 181 else if (desc->irq_data.chip->irq_disable)
182 desc->irq_data.chip->irq_disable(&desc->irq_data); 182 desc->irq_data.chip->irq_disable(&desc->irq_data);
183 else 183 else
184 desc->irq_data.chip->irq_mask(&desc->irq_data); 184 desc->irq_data.chip->irq_mask(&desc->irq_data);
diff --git a/kernel/sched.c b/kernel/sched.c
index ccacdbdecf45..ec5f472bc5b9 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3065,7 +3065,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
3065#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 3065#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3066 local_irq_disable(); 3066 local_irq_disable();
3067#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 3067#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
3068 perf_event_task_sched_in(current); 3068 perf_event_task_sched_in(prev, current);
3069#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 3069#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3070 local_irq_enable(); 3070 local_irq_enable();
3071#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 3071#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -4279,9 +4279,9 @@ pick_next_task(struct rq *rq)
4279} 4279}
4280 4280
4281/* 4281/*
4282 * schedule() is the main scheduler function. 4282 * __schedule() is the main scheduler function.
4283 */ 4283 */
4284asmlinkage void __sched schedule(void) 4284static void __sched __schedule(void)
4285{ 4285{
4286 struct task_struct *prev, *next; 4286 struct task_struct *prev, *next;
4287 unsigned long *switch_count; 4287 unsigned long *switch_count;
@@ -4322,16 +4322,6 @@ need_resched:
4322 if (to_wakeup) 4322 if (to_wakeup)
4323 try_to_wake_up_local(to_wakeup); 4323 try_to_wake_up_local(to_wakeup);
4324 } 4324 }
4325
4326 /*
4327 * If we are going to sleep and we have plugged IO
4328 * queued, make sure to submit it to avoid deadlocks.
4329 */
4330 if (blk_needs_flush_plug(prev)) {
4331 raw_spin_unlock(&rq->lock);
4332 blk_schedule_flush_plug(prev);
4333 raw_spin_lock(&rq->lock);
4334 }
4335 } 4325 }
4336 switch_count = &prev->nvcsw; 4326 switch_count = &prev->nvcsw;
4337 } 4327 }
@@ -4369,6 +4359,26 @@ need_resched:
4369 if (need_resched()) 4359 if (need_resched())
4370 goto need_resched; 4360 goto need_resched;
4371} 4361}
4362
4363static inline void sched_submit_work(struct task_struct *tsk)
4364{
4365 if (!tsk->state)
4366 return;
4367 /*
4368 * If we are going to sleep and we have plugged IO queued,
4369 * make sure to submit it to avoid deadlocks.
4370 */
4371 if (blk_needs_flush_plug(tsk))
4372 blk_schedule_flush_plug(tsk);
4373}
4374
4375asmlinkage void schedule(void)
4376{
4377 struct task_struct *tsk = current;
4378
4379 sched_submit_work(tsk);
4380 __schedule();
4381}
4372EXPORT_SYMBOL(schedule); 4382EXPORT_SYMBOL(schedule);
4373 4383
4374#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 4384#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
@@ -4435,7 +4445,7 @@ asmlinkage void __sched notrace preempt_schedule(void)
4435 4445
4436 do { 4446 do {
4437 add_preempt_count_notrace(PREEMPT_ACTIVE); 4447 add_preempt_count_notrace(PREEMPT_ACTIVE);
4438 schedule(); 4448 __schedule();
4439 sub_preempt_count_notrace(PREEMPT_ACTIVE); 4449 sub_preempt_count_notrace(PREEMPT_ACTIVE);
4440 4450
4441 /* 4451 /*
@@ -4463,7 +4473,7 @@ asmlinkage void __sched preempt_schedule_irq(void)
4463 do { 4473 do {
4464 add_preempt_count(PREEMPT_ACTIVE); 4474 add_preempt_count(PREEMPT_ACTIVE);
4465 local_irq_enable(); 4475 local_irq_enable();
4466 schedule(); 4476 __schedule();
4467 local_irq_disable(); 4477 local_irq_disable();
4468 sub_preempt_count(PREEMPT_ACTIVE); 4478 sub_preempt_count(PREEMPT_ACTIVE);
4469 4479
@@ -5588,7 +5598,7 @@ static inline int should_resched(void)
5588static void __cond_resched(void) 5598static void __cond_resched(void)
5589{ 5599{
5590 add_preempt_count(PREEMPT_ACTIVE); 5600 add_preempt_count(PREEMPT_ACTIVE);
5591 schedule(); 5601 __schedule();
5592 sub_preempt_count(PREEMPT_ACTIVE); 5602 sub_preempt_count(PREEMPT_ACTIVE);
5593} 5603}
5594 5604
@@ -7443,6 +7453,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
7443 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); 7453 struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
7444 if (sd && (sd->flags & SD_OVERLAP)) 7454 if (sd && (sd->flags & SD_OVERLAP))
7445 free_sched_groups(sd->groups, 0); 7455 free_sched_groups(sd->groups, 0);
7456 kfree(*per_cpu_ptr(sdd->sd, j));
7446 kfree(*per_cpu_ptr(sdd->sg, j)); 7457 kfree(*per_cpu_ptr(sdd->sg, j));
7447 kfree(*per_cpu_ptr(sdd->sgp, j)); 7458 kfree(*per_cpu_ptr(sdd->sgp, j));
7448 } 7459 }
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index e19ce1454ee1..e66046456f4f 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -655,6 +655,7 @@ static struct genl_ops taskstats_ops = {
655 .cmd = TASKSTATS_CMD_GET, 655 .cmd = TASKSTATS_CMD_GET,
656 .doit = taskstats_user_cmd, 656 .doit = taskstats_user_cmd,
657 .policy = taskstats_cmd_get_policy, 657 .policy = taskstats_cmd_get_policy,
658 .flags = GENL_ADMIN_PERM,
658}; 659};
659 660
660static struct genl_ops cgroupstats_ops = { 661static struct genl_ops cgroupstats_ops = {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 59f369f98a04..ea5e1a928d5b 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer)
441static void alarm_timer_get(struct k_itimer *timr, 441static void alarm_timer_get(struct k_itimer *timr,
442 struct itimerspec *cur_setting) 442 struct itimerspec *cur_setting)
443{ 443{
444 memset(cur_setting, 0, sizeof(struct itimerspec));
445
444 cur_setting->it_interval = 446 cur_setting->it_interval =
445 ktime_to_timespec(timr->it.alarmtimer.period); 447 ktime_to_timespec(timr->it.alarmtimer.period);
446 cur_setting->it_value = 448 cur_setting->it_value =
@@ -479,11 +481,17 @@ static int alarm_timer_set(struct k_itimer *timr, int flags,
479 if (!rtcdev) 481 if (!rtcdev)
480 return -ENOTSUPP; 482 return -ENOTSUPP;
481 483
482 /* Save old values */ 484 /*
483 old_setting->it_interval = 485 * XXX HACK! Currently we can DOS a system if the interval
484 ktime_to_timespec(timr->it.alarmtimer.period); 486 * period on alarmtimers is too small. Cap the interval here
485 old_setting->it_value = 487 * to 100us and solve this properly in a future patch! -jstultz
486 ktime_to_timespec(timr->it.alarmtimer.node.expires); 488 */
489 if ((new_setting->it_interval.tv_sec == 0) &&
490 (new_setting->it_interval.tv_nsec < 100000))
491 new_setting->it_interval.tv_nsec = 100000;
492
493 if (old_setting)
494 alarm_timer_get(timr, old_setting);
487 495
488 /* If the timer was already set, cancel it */ 496 /* If the timer was already set, cancel it */
489 alarm_cancel(&timr->it.alarmtimer); 497 alarm_cancel(&timr->it.alarmtimer);
diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 24dc60d9fa1f..5bbfac85866e 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -78,6 +78,7 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk)
78 78
79#define KB 1024 79#define KB 1024
80#define MB (1024*KB) 80#define MB (1024*KB)
81#define KB_MASK (~(KB-1))
81/* 82/*
82 * fill in extended accounting fields 83 * fill in extended accounting fields
83 */ 84 */
@@ -95,14 +96,14 @@ void xacct_add_tsk(struct taskstats *stats, struct task_struct *p)
95 stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB; 96 stats->hiwater_vm = get_mm_hiwater_vm(mm) * PAGE_SIZE / KB;
96 mmput(mm); 97 mmput(mm);
97 } 98 }
98 stats->read_char = p->ioac.rchar; 99 stats->read_char = p->ioac.rchar & KB_MASK;
99 stats->write_char = p->ioac.wchar; 100 stats->write_char = p->ioac.wchar & KB_MASK;
100 stats->read_syscalls = p->ioac.syscr; 101 stats->read_syscalls = p->ioac.syscr & KB_MASK;
101 stats->write_syscalls = p->ioac.syscw; 102 stats->write_syscalls = p->ioac.syscw & KB_MASK;
102#ifdef CONFIG_TASK_IO_ACCOUNTING 103#ifdef CONFIG_TASK_IO_ACCOUNTING
103 stats->read_bytes = p->ioac.read_bytes; 104 stats->read_bytes = p->ioac.read_bytes & KB_MASK;
104 stats->write_bytes = p->ioac.write_bytes; 105 stats->write_bytes = p->ioac.write_bytes & KB_MASK;
105 stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes; 106 stats->cancelled_write_bytes = p->ioac.cancelled_write_bytes & KB_MASK;
106#else 107#else
107 stats->read_bytes = 0; 108 stats->read_bytes = 0;
108 stats->write_bytes = 0; 109 stats->write_bytes = 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25fb1b0e53fa..1783aabc6128 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2412,8 +2412,13 @@ reflush:
2412 2412
2413 for_each_cwq_cpu(cpu, wq) { 2413 for_each_cwq_cpu(cpu, wq) {
2414 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); 2414 struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
2415 bool drained;
2415 2416
2416 if (!cwq->nr_active && list_empty(&cwq->delayed_works)) 2417 spin_lock_irq(&cwq->gcwq->lock);
2418 drained = !cwq->nr_active && list_empty(&cwq->delayed_works);
2419 spin_unlock_irq(&cwq->gcwq->lock);
2420
2421 if (drained)
2417 continue; 2422 continue;
2418 2423
2419 if (++flush_cnt == 10 || 2424 if (++flush_cnt == 10 ||