diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-19 21:19:48 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-19 21:19:48 -0500 |
commit | d652e1eb8e7b739fccbfb503a3da3e9f640fbf3d (patch) | |
tree | 55ab77bad0cbb045eac0b84b80d63f88f1ae09e6 /kernel/sched | |
parent | 8f55cea410dbc56114bb71a3742032070c8108d0 (diff) | |
parent | 77852fea6e2442a0e654a9292060489895de18c7 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler changes from Ingo Molnar:
"Main changes:
- scheduler side full-dynticks (user-space execution is undisturbed
and receives no timer IRQs) preparation changes that convert the
cputime accounting code to be full-dynticks ready, from Frederic
Weisbecker.
- Initial sched.h split-up changes, by Clark Williams
- select_idle_sibling() performance improvement by Mike Galbraith:
" 1 tbench pair (worst case) in a 10 core + SMT package:
pre 15.22 MB/sec 1 procs
post 252.01 MB/sec 1 procs "
- sched_rr_get_interval() ABI fix/change. We think this detail is not
used by apps (so it's not an ABI in practice), but lets keep it
under observation.
- misc RT scheduling cleanups, optimizations"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits)
sched/rt: Add <linux/sched/rt.h> header to <linux/init_task.h>
cputime: Remove irqsave from seqlock readers
sched, powerpc: Fix sched.h split-up build failure
cputime: Restore CPU_ACCOUNTING config defaults for PPC64
sched/rt: Move rt specific bits into new header file
sched/rt: Add a tuning knob to allow changing SCHED_RR timeslice
sched: Move sched.h sysctl bits into separate header
sched: Fix signedness bug in yield_to()
sched: Fix select_idle_sibling() bouncing cow syndrome
sched/rt: Further simplify pick_rt_task()
sched/rt: Do not account zero delta_exec in update_curr_rt()
cputime: Safely read cputime of full dynticks CPUs
kvm: Prepare to add generic guest entry/exit callbacks
cputime: Use accessors to read task cputime stats
cputime: Allow dynamic switch between tick/virtual based cputime accounting
cputime: Generic on-demand virtual cputime accounting
cputime: Move default nsecs_to_cputime() to jiffies based cputime file
cputime: Librarize per nsecs resolution cputime definitions
cputime: Avoid multiplication overflow on utime scaling
context_tracking: Export context state for generic vtime
...
Fix up conflict in kernel/context_tracking.c due to comment additions.
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/core.c | 22 | ||||
-rw-r--r-- | kernel/sched/cpupri.c | 2 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 314 | ||||
-rw-r--r-- | kernel/sched/fair.c | 27 | ||||
-rw-r--r-- | kernel/sched/rt.c | 26 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 |
6 files changed, 322 insertions, 71 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 26058d0bebba..4a88f1d51563 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -4371,7 +4371,7 @@ bool __sched yield_to(struct task_struct *p, bool preempt) | |||
4371 | struct task_struct *curr = current; | 4371 | struct task_struct *curr = current; |
4372 | struct rq *rq, *p_rq; | 4372 | struct rq *rq, *p_rq; |
4373 | unsigned long flags; | 4373 | unsigned long flags; |
4374 | bool yielded = 0; | 4374 | int yielded = 0; |
4375 | 4375 | ||
4376 | local_irq_save(flags); | 4376 | local_irq_save(flags); |
4377 | rq = this_rq(); | 4377 | rq = this_rq(); |
@@ -4667,6 +4667,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4667 | */ | 4667 | */ |
4668 | idle->sched_class = &idle_sched_class; | 4668 | idle->sched_class = &idle_sched_class; |
4669 | ftrace_graph_init_idle_task(idle, cpu); | 4669 | ftrace_graph_init_idle_task(idle, cpu); |
4670 | vtime_init_idle(idle); | ||
4670 | #if defined(CONFIG_SMP) | 4671 | #if defined(CONFIG_SMP) |
4671 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | 4672 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); |
4672 | #endif | 4673 | #endif |
@@ -7508,6 +7509,25 @@ static int sched_rt_global_constraints(void) | |||
7508 | } | 7509 | } |
7509 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7510 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7510 | 7511 | ||
7512 | int sched_rr_handler(struct ctl_table *table, int write, | ||
7513 | void __user *buffer, size_t *lenp, | ||
7514 | loff_t *ppos) | ||
7515 | { | ||
7516 | int ret; | ||
7517 | static DEFINE_MUTEX(mutex); | ||
7518 | |||
7519 | mutex_lock(&mutex); | ||
7520 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
7521 | /* make sure that internally we keep jiffies */ | ||
7522 | /* also, writing zero resets timeslice to default */ | ||
7523 | if (!ret && write) { | ||
7524 | sched_rr_timeslice = sched_rr_timeslice <= 0 ? | ||
7525 | RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); | ||
7526 | } | ||
7527 | mutex_unlock(&mutex); | ||
7528 | return ret; | ||
7529 | } | ||
7530 | |||
7511 | int sched_rt_handler(struct ctl_table *table, int write, | 7531 | int sched_rt_handler(struct ctl_table *table, int write, |
7512 | void __user *buffer, size_t *lenp, | 7532 | void __user *buffer, size_t *lenp, |
7513 | loff_t *ppos) | 7533 | loff_t *ppos) |
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 23aa789c53ee..1095e878a46f 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c | |||
@@ -28,6 +28,8 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
31 | #include <linux/sched.h> | ||
32 | #include <linux/sched/rt.h> | ||
31 | #include "cpupri.h" | 33 | #include "cpupri.h" |
32 | 34 | ||
33 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | 35 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202fcf79..9857329ed280 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/tsacct_kern.h> | 3 | #include <linux/tsacct_kern.h> |
4 | #include <linux/kernel_stat.h> | 4 | #include <linux/kernel_stat.h> |
5 | #include <linux/static_key.h> | 5 | #include <linux/static_key.h> |
6 | #include <linux/context_tracking.h> | ||
6 | #include "sched.h" | 7 | #include "sched.h" |
7 | 8 | ||
8 | 9 | ||
@@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
163 | task_group_account_field(p, index, (__force u64) cputime); | 164 | task_group_account_field(p, index, (__force u64) cputime); |
164 | 165 | ||
165 | /* Account for user time used */ | 166 | /* Account for user time used */ |
166 | acct_update_integrals(p); | 167 | acct_account_cputime(p); |
167 | } | 168 | } |
168 | 169 | ||
169 | /* | 170 | /* |
@@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, | |||
213 | task_group_account_field(p, index, (__force u64) cputime); | 214 | task_group_account_field(p, index, (__force u64) cputime); |
214 | 215 | ||
215 | /* Account for system time used */ | 216 | /* Account for system time used */ |
216 | acct_update_integrals(p); | 217 | acct_account_cputime(p); |
217 | } | 218 | } |
218 | 219 | ||
219 | /* | 220 | /* |
@@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void) | |||
295 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | 296 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) |
296 | { | 297 | { |
297 | struct signal_struct *sig = tsk->signal; | 298 | struct signal_struct *sig = tsk->signal; |
299 | cputime_t utime, stime; | ||
298 | struct task_struct *t; | 300 | struct task_struct *t; |
299 | 301 | ||
300 | times->utime = sig->utime; | 302 | times->utime = sig->utime; |
@@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | |||
308 | 310 | ||
309 | t = tsk; | 311 | t = tsk; |
310 | do { | 312 | do { |
311 | times->utime += t->utime; | 313 | task_cputime(tsk, &utime, &stime); |
312 | times->stime += t->stime; | 314 | times->utime += utime; |
315 | times->stime += stime; | ||
313 | times->sum_exec_runtime += task_sched_runtime(t); | 316 | times->sum_exec_runtime += task_sched_runtime(t); |
314 | } while_each_thread(tsk, t); | 317 | } while_each_thread(tsk, t); |
315 | out: | 318 | out: |
316 | rcu_read_unlock(); | 319 | rcu_read_unlock(); |
317 | } | 320 | } |
318 | 321 | ||
319 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
320 | |||
321 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 322 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
322 | /* | 323 | /* |
323 | * Account a tick to a process and cpustat | 324 | * Account a tick to a process and cpustat |
@@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks) | |||
382 | irqtime_account_process_tick(current, 0, rq); | 383 | irqtime_account_process_tick(current, 0, rq); |
383 | } | 384 | } |
384 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 385 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
385 | static void irqtime_account_idle_ticks(int ticks) {} | 386 | static inline void irqtime_account_idle_ticks(int ticks) {} |
386 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 387 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
387 | struct rq *rq) {} | 388 | struct rq *rq) {} |
388 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 389 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
389 | 390 | ||
391 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | ||
390 | /* | 392 | /* |
391 | * Account a single tick of cpu time. | 393 | * Account a single tick of cpu time. |
392 | * @p: the process that the cpu time gets accounted to | 394 | * @p: the process that the cpu time gets accounted to |
@@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
397 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 399 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
398 | struct rq *rq = this_rq(); | 400 | struct rq *rq = this_rq(); |
399 | 401 | ||
402 | if (vtime_accounting_enabled()) | ||
403 | return; | ||
404 | |||
400 | if (sched_clock_irqtime) { | 405 | if (sched_clock_irqtime) { |
401 | irqtime_account_process_tick(p, user_tick, rq); | 406 | irqtime_account_process_tick(p, user_tick, rq); |
402 | return; | 407 | return; |
@@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks) | |||
438 | 443 | ||
439 | account_idle_time(jiffies_to_cputime(ticks)); | 444 | account_idle_time(jiffies_to_cputime(ticks)); |
440 | } | 445 | } |
441 | 446 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ | |
442 | #endif | ||
443 | 447 | ||
444 | /* | 448 | /* |
445 | * Use precise platform statistics if available: | 449 | * Use precise platform statistics if available: |
@@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime | |||
461 | *st = cputime.stime; | 465 | *st = cputime.stime; |
462 | } | 466 | } |
463 | 467 | ||
464 | void vtime_account_system_irqsafe(struct task_struct *tsk) | ||
465 | { | ||
466 | unsigned long flags; | ||
467 | |||
468 | local_irq_save(flags); | ||
469 | vtime_account_system(tsk); | ||
470 | local_irq_restore(flags); | ||
471 | } | ||
472 | EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); | ||
473 | |||
474 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH | 468 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
475 | void vtime_task_switch(struct task_struct *prev) | 469 | void vtime_task_switch(struct task_struct *prev) |
476 | { | 470 | { |
471 | if (!vtime_accounting_enabled()) | ||
472 | return; | ||
473 | |||
477 | if (is_idle_task(prev)) | 474 | if (is_idle_task(prev)) |
478 | vtime_account_idle(prev); | 475 | vtime_account_idle(prev); |
479 | else | 476 | else |
480 | vtime_account_system(prev); | 477 | vtime_account_system(prev); |
481 | 478 | ||
479 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | ||
482 | vtime_account_user(prev); | 480 | vtime_account_user(prev); |
481 | #endif | ||
483 | arch_vtime_task_switch(prev); | 482 | arch_vtime_task_switch(prev); |
484 | } | 483 | } |
485 | #endif | 484 | #endif |
@@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev) | |||
493 | * vtime_account(). | 492 | * vtime_account(). |
494 | */ | 493 | */ |
495 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 494 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
496 | void vtime_account(struct task_struct *tsk) | 495 | void vtime_account_irq_enter(struct task_struct *tsk) |
497 | { | 496 | { |
498 | if (in_interrupt() || !is_idle_task(tsk)) | 497 | if (!vtime_accounting_enabled()) |
499 | vtime_account_system(tsk); | 498 | return; |
500 | else | 499 | |
501 | vtime_account_idle(tsk); | 500 | if (!in_interrupt()) { |
501 | /* | ||
502 | * If we interrupted user, context_tracking_in_user() | ||
503 | * is 1 because the context tracking don't hook | ||
504 | * on irq entry/exit. This way we know if | ||
505 | * we need to flush user time on kernel entry. | ||
506 | */ | ||
507 | if (context_tracking_in_user()) { | ||
508 | vtime_account_user(tsk); | ||
509 | return; | ||
510 | } | ||
511 | |||
512 | if (is_idle_task(tsk)) { | ||
513 | vtime_account_idle(tsk); | ||
514 | return; | ||
515 | } | ||
516 | } | ||
517 | vtime_account_system(tsk); | ||
502 | } | 518 | } |
503 | EXPORT_SYMBOL_GPL(vtime_account); | 519 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
504 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 520 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
505 | 521 | ||
506 | #else | 522 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
507 | |||
508 | #ifndef nsecs_to_cputime | ||
509 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
510 | #endif | ||
511 | 523 | ||
512 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | 524 | static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) |
513 | { | 525 | { |
514 | u64 temp = (__force u64) rtime; | 526 | u64 temp = (__force u64) rtime; |
515 | 527 | ||
516 | temp *= (__force u64) utime; | 528 | temp *= (__force u64) stime; |
517 | 529 | ||
518 | if (sizeof(cputime_t) == 4) | 530 | if (sizeof(cputime_t) == 4) |
519 | temp = div_u64(temp, (__force u32) total); | 531 | temp = div_u64(temp, (__force u32) total); |
@@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr, | |||
531 | struct cputime *prev, | 543 | struct cputime *prev, |
532 | cputime_t *ut, cputime_t *st) | 544 | cputime_t *ut, cputime_t *st) |
533 | { | 545 | { |
534 | cputime_t rtime, utime, total; | 546 | cputime_t rtime, stime, total; |
535 | 547 | ||
536 | utime = curr->utime; | 548 | stime = curr->stime; |
537 | total = utime + curr->stime; | 549 | total = stime + curr->utime; |
538 | 550 | ||
539 | /* | 551 | /* |
540 | * Tick based cputime accounting depend on random scheduling | 552 | * Tick based cputime accounting depend on random scheduling |
@@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr, | |||
549 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); | 561 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); |
550 | 562 | ||
551 | if (total) | 563 | if (total) |
552 | utime = scale_utime(utime, rtime, total); | 564 | stime = scale_stime(stime, rtime, total); |
553 | else | 565 | else |
554 | utime = rtime; | 566 | stime = rtime; |
555 | 567 | ||
556 | /* | 568 | /* |
557 | * If the tick based count grows faster than the scheduler one, | 569 | * If the tick based count grows faster than the scheduler one, |
558 | * the result of the scaling may go backward. | 570 | * the result of the scaling may go backward. |
559 | * Let's enforce monotonicity. | 571 | * Let's enforce monotonicity. |
560 | */ | 572 | */ |
561 | prev->utime = max(prev->utime, utime); | 573 | prev->stime = max(prev->stime, stime); |
562 | prev->stime = max(prev->stime, rtime - prev->utime); | 574 | prev->utime = max(prev->utime, rtime - prev->stime); |
563 | 575 | ||
564 | *ut = prev->utime; | 576 | *ut = prev->utime; |
565 | *st = prev->stime; | 577 | *st = prev->stime; |
@@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr, | |||
568 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | 580 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
569 | { | 581 | { |
570 | struct task_cputime cputime = { | 582 | struct task_cputime cputime = { |
571 | .utime = p->utime, | ||
572 | .stime = p->stime, | ||
573 | .sum_exec_runtime = p->se.sum_exec_runtime, | 583 | .sum_exec_runtime = p->se.sum_exec_runtime, |
574 | }; | 584 | }; |
575 | 585 | ||
586 | task_cputime(p, &cputime.utime, &cputime.stime); | ||
576 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); | 587 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
577 | } | 588 | } |
578 | 589 | ||
@@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime | |||
586 | thread_group_cputime(p, &cputime); | 597 | thread_group_cputime(p, &cputime); |
587 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); | 598 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
588 | } | 599 | } |
589 | #endif | 600 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
601 | |||
602 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | ||
603 | static unsigned long long vtime_delta(struct task_struct *tsk) | ||
604 | { | ||
605 | unsigned long long clock; | ||
606 | |||
607 | clock = sched_clock(); | ||
608 | if (clock < tsk->vtime_snap) | ||
609 | return 0; | ||
610 | |||
611 | return clock - tsk->vtime_snap; | ||
612 | } | ||
613 | |||
614 | static cputime_t get_vtime_delta(struct task_struct *tsk) | ||
615 | { | ||
616 | unsigned long long delta = vtime_delta(tsk); | ||
617 | |||
618 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); | ||
619 | tsk->vtime_snap += delta; | ||
620 | |||
621 | /* CHECKME: always safe to convert nsecs to cputime? */ | ||
622 | return nsecs_to_cputime(delta); | ||
623 | } | ||
624 | |||
625 | static void __vtime_account_system(struct task_struct *tsk) | ||
626 | { | ||
627 | cputime_t delta_cpu = get_vtime_delta(tsk); | ||
628 | |||
629 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | ||
630 | } | ||
631 | |||
632 | void vtime_account_system(struct task_struct *tsk) | ||
633 | { | ||
634 | if (!vtime_accounting_enabled()) | ||
635 | return; | ||
636 | |||
637 | write_seqlock(&tsk->vtime_seqlock); | ||
638 | __vtime_account_system(tsk); | ||
639 | write_sequnlock(&tsk->vtime_seqlock); | ||
640 | } | ||
641 | |||
642 | void vtime_account_irq_exit(struct task_struct *tsk) | ||
643 | { | ||
644 | if (!vtime_accounting_enabled()) | ||
645 | return; | ||
646 | |||
647 | write_seqlock(&tsk->vtime_seqlock); | ||
648 | if (context_tracking_in_user()) | ||
649 | tsk->vtime_snap_whence = VTIME_USER; | ||
650 | __vtime_account_system(tsk); | ||
651 | write_sequnlock(&tsk->vtime_seqlock); | ||
652 | } | ||
653 | |||
654 | void vtime_account_user(struct task_struct *tsk) | ||
655 | { | ||
656 | cputime_t delta_cpu; | ||
657 | |||
658 | if (!vtime_accounting_enabled()) | ||
659 | return; | ||
660 | |||
661 | delta_cpu = get_vtime_delta(tsk); | ||
662 | |||
663 | write_seqlock(&tsk->vtime_seqlock); | ||
664 | tsk->vtime_snap_whence = VTIME_SYS; | ||
665 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | ||
666 | write_sequnlock(&tsk->vtime_seqlock); | ||
667 | } | ||
668 | |||
669 | void vtime_user_enter(struct task_struct *tsk) | ||
670 | { | ||
671 | if (!vtime_accounting_enabled()) | ||
672 | return; | ||
673 | |||
674 | write_seqlock(&tsk->vtime_seqlock); | ||
675 | tsk->vtime_snap_whence = VTIME_USER; | ||
676 | __vtime_account_system(tsk); | ||
677 | write_sequnlock(&tsk->vtime_seqlock); | ||
678 | } | ||
679 | |||
680 | void vtime_guest_enter(struct task_struct *tsk) | ||
681 | { | ||
682 | write_seqlock(&tsk->vtime_seqlock); | ||
683 | __vtime_account_system(tsk); | ||
684 | current->flags |= PF_VCPU; | ||
685 | write_sequnlock(&tsk->vtime_seqlock); | ||
686 | } | ||
687 | |||
688 | void vtime_guest_exit(struct task_struct *tsk) | ||
689 | { | ||
690 | write_seqlock(&tsk->vtime_seqlock); | ||
691 | __vtime_account_system(tsk); | ||
692 | current->flags &= ~PF_VCPU; | ||
693 | write_sequnlock(&tsk->vtime_seqlock); | ||
694 | } | ||
695 | |||
696 | void vtime_account_idle(struct task_struct *tsk) | ||
697 | { | ||
698 | cputime_t delta_cpu = get_vtime_delta(tsk); | ||
699 | |||
700 | account_idle_time(delta_cpu); | ||
701 | } | ||
702 | |||
703 | bool vtime_accounting_enabled(void) | ||
704 | { | ||
705 | return context_tracking_active(); | ||
706 | } | ||
707 | |||
708 | void arch_vtime_task_switch(struct task_struct *prev) | ||
709 | { | ||
710 | write_seqlock(&prev->vtime_seqlock); | ||
711 | prev->vtime_snap_whence = VTIME_SLEEPING; | ||
712 | write_sequnlock(&prev->vtime_seqlock); | ||
713 | |||
714 | write_seqlock(¤t->vtime_seqlock); | ||
715 | current->vtime_snap_whence = VTIME_SYS; | ||
716 | current->vtime_snap = sched_clock(); | ||
717 | write_sequnlock(¤t->vtime_seqlock); | ||
718 | } | ||
719 | |||
720 | void vtime_init_idle(struct task_struct *t) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | |||
724 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | ||
725 | t->vtime_snap_whence = VTIME_SYS; | ||
726 | t->vtime_snap = sched_clock(); | ||
727 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | ||
728 | } | ||
729 | |||
730 | cputime_t task_gtime(struct task_struct *t) | ||
731 | { | ||
732 | unsigned int seq; | ||
733 | cputime_t gtime; | ||
734 | |||
735 | do { | ||
736 | seq = read_seqbegin(&t->vtime_seqlock); | ||
737 | |||
738 | gtime = t->gtime; | ||
739 | if (t->flags & PF_VCPU) | ||
740 | gtime += vtime_delta(t); | ||
741 | |||
742 | } while (read_seqretry(&t->vtime_seqlock, seq)); | ||
743 | |||
744 | return gtime; | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Fetch cputime raw values from fields of task_struct and | ||
749 | * add up the pending nohz execution time since the last | ||
750 | * cputime snapshot. | ||
751 | */ | ||
752 | static void | ||
753 | fetch_task_cputime(struct task_struct *t, | ||
754 | cputime_t *u_dst, cputime_t *s_dst, | ||
755 | cputime_t *u_src, cputime_t *s_src, | ||
756 | cputime_t *udelta, cputime_t *sdelta) | ||
757 | { | ||
758 | unsigned int seq; | ||
759 | unsigned long long delta; | ||
760 | |||
761 | do { | ||
762 | *udelta = 0; | ||
763 | *sdelta = 0; | ||
764 | |||
765 | seq = read_seqbegin(&t->vtime_seqlock); | ||
766 | |||
767 | if (u_dst) | ||
768 | *u_dst = *u_src; | ||
769 | if (s_dst) | ||
770 | *s_dst = *s_src; | ||
771 | |||
772 | /* Task is sleeping, nothing to add */ | ||
773 | if (t->vtime_snap_whence == VTIME_SLEEPING || | ||
774 | is_idle_task(t)) | ||
775 | continue; | ||
776 | |||
777 | delta = vtime_delta(t); | ||
778 | |||
779 | /* | ||
780 | * Task runs either in user or kernel space, add pending nohz time to | ||
781 | * the right place. | ||
782 | */ | ||
783 | if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { | ||
784 | *udelta = delta; | ||
785 | } else { | ||
786 | if (t->vtime_snap_whence == VTIME_SYS) | ||
787 | *sdelta = delta; | ||
788 | } | ||
789 | } while (read_seqretry(&t->vtime_seqlock, seq)); | ||
790 | } | ||
791 | |||
792 | |||
793 | void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) | ||
794 | { | ||
795 | cputime_t udelta, sdelta; | ||
796 | |||
797 | fetch_task_cputime(t, utime, stime, &t->utime, | ||
798 | &t->stime, &udelta, &sdelta); | ||
799 | if (utime) | ||
800 | *utime += udelta; | ||
801 | if (stime) | ||
802 | *stime += sdelta; | ||
803 | } | ||
804 | |||
805 | void task_cputime_scaled(struct task_struct *t, | ||
806 | cputime_t *utimescaled, cputime_t *stimescaled) | ||
807 | { | ||
808 | cputime_t udelta, sdelta; | ||
809 | |||
810 | fetch_task_cputime(t, utimescaled, stimescaled, | ||
811 | &t->utimescaled, &t->stimescaled, &udelta, &sdelta); | ||
812 | if (utimescaled) | ||
813 | *utimescaled += cputime_to_scaled(udelta); | ||
814 | if (stimescaled) | ||
815 | *stimescaled += cputime_to_scaled(sdelta); | ||
816 | } | ||
817 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 81fa53643409..7a33e5986fc5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
1680 | } | 1680 | } |
1681 | 1681 | ||
1682 | /* ensure we never gain time by being placed backwards. */ | 1682 | /* ensure we never gain time by being placed backwards. */ |
1683 | vruntime = max_vruntime(se->vruntime, vruntime); | 1683 | se->vruntime = max_vruntime(se->vruntime, vruntime); |
1684 | |||
1685 | se->vruntime = vruntime; | ||
1686 | } | 1684 | } |
1687 | 1685 | ||
1688 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq); | 1686 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq); |
@@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
3254 | */ | 3252 | */ |
3255 | static int select_idle_sibling(struct task_struct *p, int target) | 3253 | static int select_idle_sibling(struct task_struct *p, int target) |
3256 | { | 3254 | { |
3257 | int cpu = smp_processor_id(); | ||
3258 | int prev_cpu = task_cpu(p); | ||
3259 | struct sched_domain *sd; | 3255 | struct sched_domain *sd; |
3260 | struct sched_group *sg; | 3256 | struct sched_group *sg; |
3261 | int i; | 3257 | int i = task_cpu(p); |
3262 | 3258 | ||
3263 | /* | 3259 | if (idle_cpu(target)) |
3264 | * If the task is going to be woken-up on this cpu and if it is | 3260 | return target; |
3265 | * already idle, then it is the right target. | ||
3266 | */ | ||
3267 | if (target == cpu && idle_cpu(cpu)) | ||
3268 | return cpu; | ||
3269 | 3261 | ||
3270 | /* | 3262 | /* |
3271 | * If the task is going to be woken-up on the cpu where it previously | 3263 | * If the prevous cpu is cache affine and idle, don't be stupid. |
3272 | * ran and if it is currently idle, then it the right target. | ||
3273 | */ | 3264 | */ |
3274 | if (target == prev_cpu && idle_cpu(prev_cpu)) | 3265 | if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) |
3275 | return prev_cpu; | 3266 | return i; |
3276 | 3267 | ||
3277 | /* | 3268 | /* |
3278 | * Otherwise, iterate the domains and find an elegible idle cpu. | 3269 | * Otherwise, iterate the domains and find an elegible idle cpu. |
@@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
3286 | goto next; | 3277 | goto next; |
3287 | 3278 | ||
3288 | for_each_cpu(i, sched_group_cpus(sg)) { | 3279 | for_each_cpu(i, sched_group_cpus(sg)) { |
3289 | if (!idle_cpu(i)) | 3280 | if (i == target || !idle_cpu(i)) |
3290 | goto next; | 3281 | goto next; |
3291 | } | 3282 | } |
3292 | 3283 | ||
@@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task | |||
6101 | * idle runqueue: | 6092 | * idle runqueue: |
6102 | */ | 6093 | */ |
6103 | if (rq->cfs.load.weight) | 6094 | if (rq->cfs.load.weight) |
6104 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 6095 | rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se)); |
6105 | 6096 | ||
6106 | return rr_interval; | 6097 | return rr_interval; |
6107 | } | 6098 | } |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 4f02b2847357..127a2c4cf4ab 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -7,6 +7,8 @@ | |||
7 | 7 | ||
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | 9 | ||
10 | int sched_rr_timeslice = RR_TIMESLICE; | ||
11 | |||
10 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); | 12 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); |
11 | 13 | ||
12 | struct rt_bandwidth def_rt_bandwidth; | 14 | struct rt_bandwidth def_rt_bandwidth; |
@@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq) | |||
925 | return; | 927 | return; |
926 | 928 | ||
927 | delta_exec = rq->clock_task - curr->se.exec_start; | 929 | delta_exec = rq->clock_task - curr->se.exec_start; |
928 | if (unlikely((s64)delta_exec < 0)) | 930 | if (unlikely((s64)delta_exec <= 0)) |
929 | delta_exec = 0; | 931 | return; |
930 | 932 | ||
931 | schedstat_set(curr->se.statistics.exec_max, | 933 | schedstat_set(curr->se.statistics.exec_max, |
932 | max(curr->se.statistics.exec_max, delta_exec)); | 934 | max(curr->se.statistics.exec_max, delta_exec)); |
@@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1427 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 1429 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
1428 | { | 1430 | { |
1429 | if (!task_running(rq, p) && | 1431 | if (!task_running(rq, p) && |
1430 | (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && | 1432 | cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) |
1431 | (p->nr_cpus_allowed > 1)) | ||
1432 | return 1; | 1433 | return 1; |
1433 | return 0; | 1434 | return 0; |
1434 | } | 1435 | } |
@@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1889 | * we may need to handle the pulling of RT tasks | 1890 | * we may need to handle the pulling of RT tasks |
1890 | * now. | 1891 | * now. |
1891 | */ | 1892 | */ |
1892 | if (p->on_rq && !rq->rt.rt_nr_running) | 1893 | if (!p->on_rq || rq->rt.rt_nr_running) |
1893 | pull_rt_task(rq); | 1894 | return; |
1895 | |||
1896 | if (pull_rt_task(rq)) | ||
1897 | resched_task(rq->curr); | ||
1894 | } | 1898 | } |
1895 | 1899 | ||
1896 | void init_sched_rt_class(void) | 1900 | void init_sched_rt_class(void) |
@@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p) | |||
1985 | if (soft != RLIM_INFINITY) { | 1989 | if (soft != RLIM_INFINITY) { |
1986 | unsigned long next; | 1990 | unsigned long next; |
1987 | 1991 | ||
1988 | p->rt.timeout++; | 1992 | if (p->rt.watchdog_stamp != jiffies) { |
1993 | p->rt.timeout++; | ||
1994 | p->rt.watchdog_stamp = jiffies; | ||
1995 | } | ||
1996 | |||
1989 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | 1997 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); |
1990 | if (p->rt.timeout > next) | 1998 | if (p->rt.timeout > next) |
1991 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; | 1999 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; |
@@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
2010 | if (--p->rt.time_slice) | 2018 | if (--p->rt.time_slice) |
2011 | return; | 2019 | return; |
2012 | 2020 | ||
2013 | p->rt.time_slice = RR_TIMESLICE; | 2021 | p->rt.time_slice = sched_rr_timeslice; |
2014 | 2022 | ||
2015 | /* | 2023 | /* |
2016 | * Requeue to the end of queue if we (and all of our ancestors) are the | 2024 | * Requeue to the end of queue if we (and all of our ancestors) are the |
@@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) | |||
2041 | * Time slice is 0 for SCHED_FIFO tasks | 2049 | * Time slice is 0 for SCHED_FIFO tasks |
2042 | */ | 2050 | */ |
2043 | if (task->policy == SCHED_RR) | 2051 | if (task->policy == SCHED_RR) |
2044 | return RR_TIMESLICE; | 2052 | return sched_rr_timeslice; |
2045 | else | 2053 | else |
2046 | return 0; | 2054 | return 0; |
2047 | } | 2055 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fc886441436a..cc03cfdf469f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1,5 +1,7 @@ | |||
1 | 1 | ||
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/sched/sysctl.h> | ||
4 | #include <linux/sched/rt.h> | ||
3 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
4 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
5 | #include <linux/stop_machine.h> | 7 | #include <linux/stop_machine.h> |