diff options
Diffstat (limited to 'kernel/sched')
-rw-r--r-- | kernel/sched/auto_group.c | 3 | ||||
-rw-r--r-- | kernel/sched/core.c | 159 | ||||
-rw-r--r-- | kernel/sched/cpupri.c | 2 | ||||
-rw-r--r-- | kernel/sched/cputime.c | 314 | ||||
-rw-r--r-- | kernel/sched/debug.c | 101 | ||||
-rw-r--r-- | kernel/sched/fair.c | 29 | ||||
-rw-r--r-- | kernel/sched/rt.c | 28 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 | ||||
-rw-r--r-- | kernel/sched/stats.c | 79 |
9 files changed, 552 insertions, 165 deletions
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..64de5f8b0c9e 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c | |||
@@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref) | |||
35 | ag->tg->rt_se = NULL; | 35 | ag->tg->rt_se = NULL; |
36 | ag->tg->rt_rq = NULL; | 36 | ag->tg->rt_rq = NULL; |
37 | #endif | 37 | #endif |
38 | sched_offline_group(ag->tg); | ||
38 | sched_destroy_group(ag->tg); | 39 | sched_destroy_group(ag->tg); |
39 | } | 40 | } |
40 | 41 | ||
@@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void) | |||
76 | if (IS_ERR(tg)) | 77 | if (IS_ERR(tg)) |
77 | goto out_free; | 78 | goto out_free; |
78 | 79 | ||
80 | sched_online_group(tg, &root_task_group); | ||
81 | |||
79 | kref_init(&ag->kref); | 82 | kref_init(&ag->kref); |
80 | init_rwsem(&ag->lock); | 83 | init_rwsem(&ag->lock); |
81 | ag->id = atomic_inc_return(&autogroup_seq_nr); | 84 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d6fdcdcbb9b1..7f12624a393c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -83,7 +83,7 @@ | |||
83 | #endif | 83 | #endif |
84 | 84 | ||
85 | #include "sched.h" | 85 | #include "sched.h" |
86 | #include "../workqueue_sched.h" | 86 | #include "../workqueue_internal.h" |
87 | #include "../smpboot.h" | 87 | #include "../smpboot.h" |
88 | 88 | ||
89 | #define CREATE_TRACE_POINTS | 89 | #define CREATE_TRACE_POINTS |
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
1132 | */ | 1132 | */ |
1133 | static int select_fallback_rq(int cpu, struct task_struct *p) | 1133 | static int select_fallback_rq(int cpu, struct task_struct *p) |
1134 | { | 1134 | { |
1135 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | 1135 | int nid = cpu_to_node(cpu); |
1136 | const struct cpumask *nodemask = NULL; | ||
1136 | enum { cpuset, possible, fail } state = cpuset; | 1137 | enum { cpuset, possible, fail } state = cpuset; |
1137 | int dest_cpu; | 1138 | int dest_cpu; |
1138 | 1139 | ||
1139 | /* Look for allowed, online CPU in same node. */ | 1140 | /* |
1140 | for_each_cpu(dest_cpu, nodemask) { | 1141 | * If the node that the cpu is on has been offlined, cpu_to_node() |
1141 | if (!cpu_online(dest_cpu)) | 1142 | * will return -1. There is no cpu on the node, and we should |
1142 | continue; | 1143 | * select the cpu on the other node. |
1143 | if (!cpu_active(dest_cpu)) | 1144 | */ |
1144 | continue; | 1145 | if (nid != -1) { |
1145 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | 1146 | nodemask = cpumask_of_node(nid); |
1146 | return dest_cpu; | 1147 | |
1148 | /* Look for allowed, online CPU in same node. */ | ||
1149 | for_each_cpu(dest_cpu, nodemask) { | ||
1150 | if (!cpu_online(dest_cpu)) | ||
1151 | continue; | ||
1152 | if (!cpu_active(dest_cpu)) | ||
1153 | continue; | ||
1154 | if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p))) | ||
1155 | return dest_cpu; | ||
1156 | } | ||
1147 | } | 1157 | } |
1148 | 1158 | ||
1149 | for (;;) { | 1159 | for (;;) { |
@@ -1523,7 +1533,8 @@ out: | |||
1523 | */ | 1533 | */ |
1524 | int wake_up_process(struct task_struct *p) | 1534 | int wake_up_process(struct task_struct *p) |
1525 | { | 1535 | { |
1526 | return try_to_wake_up(p, TASK_ALL, 0); | 1536 | WARN_ON(task_is_stopped_or_traced(p)); |
1537 | return try_to_wake_up(p, TASK_NORMAL, 0); | ||
1527 | } | 1538 | } |
1528 | EXPORT_SYMBOL(wake_up_process); | 1539 | EXPORT_SYMBOL(wake_up_process); |
1529 | 1540 | ||
@@ -1741,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister); | |||
1741 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) | 1752 | static void fire_sched_in_preempt_notifiers(struct task_struct *curr) |
1742 | { | 1753 | { |
1743 | struct preempt_notifier *notifier; | 1754 | struct preempt_notifier *notifier; |
1744 | struct hlist_node *node; | ||
1745 | 1755 | ||
1746 | hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) | 1756 | hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) |
1747 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); | 1757 | notifier->ops->sched_in(notifier, raw_smp_processor_id()); |
1748 | } | 1758 | } |
1749 | 1759 | ||
@@ -1752,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr, | |||
1752 | struct task_struct *next) | 1762 | struct task_struct *next) |
1753 | { | 1763 | { |
1754 | struct preempt_notifier *notifier; | 1764 | struct preempt_notifier *notifier; |
1755 | struct hlist_node *node; | ||
1756 | 1765 | ||
1757 | hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link) | 1766 | hlist_for_each_entry(notifier, &curr->preempt_notifiers, link) |
1758 | notifier->ops->sched_out(notifier, next); | 1767 | notifier->ops->sched_out(notifier, next); |
1759 | } | 1768 | } |
1760 | 1769 | ||
@@ -1968,11 +1977,10 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1968 | } | 1977 | } |
1969 | 1978 | ||
1970 | /* | 1979 | /* |
1971 | * nr_running, nr_uninterruptible and nr_context_switches: | 1980 | * nr_running and nr_context_switches: |
1972 | * | 1981 | * |
1973 | * externally visible scheduler statistics: current number of runnable | 1982 | * externally visible scheduler statistics: current number of runnable |
1974 | * threads, current number of uninterruptible-sleeping threads, total | 1983 | * threads, total number of context switches performed since bootup. |
1975 | * number of context switches performed since bootup. | ||
1976 | */ | 1984 | */ |
1977 | unsigned long nr_running(void) | 1985 | unsigned long nr_running(void) |
1978 | { | 1986 | { |
@@ -1984,23 +1992,6 @@ unsigned long nr_running(void) | |||
1984 | return sum; | 1992 | return sum; |
1985 | } | 1993 | } |
1986 | 1994 | ||
1987 | unsigned long nr_uninterruptible(void) | ||
1988 | { | ||
1989 | unsigned long i, sum = 0; | ||
1990 | |||
1991 | for_each_possible_cpu(i) | ||
1992 | sum += cpu_rq(i)->nr_uninterruptible; | ||
1993 | |||
1994 | /* | ||
1995 | * Since we read the counters lockless, it might be slightly | ||
1996 | * inaccurate. Do not allow it to go below zero though: | ||
1997 | */ | ||
1998 | if (unlikely((long)sum < 0)) | ||
1999 | sum = 0; | ||
2000 | |||
2001 | return sum; | ||
2002 | } | ||
2003 | |||
2004 | unsigned long long nr_context_switches(void) | 1995 | unsigned long long nr_context_switches(void) |
2005 | { | 1996 | { |
2006 | int i; | 1997 | int i; |
@@ -2785,7 +2776,7 @@ static noinline void __schedule_bug(struct task_struct *prev) | |||
2785 | if (irqs_disabled()) | 2776 | if (irqs_disabled()) |
2786 | print_irqtrace_events(prev); | 2777 | print_irqtrace_events(prev); |
2787 | dump_stack(); | 2778 | dump_stack(); |
2788 | add_taint(TAINT_WARN); | 2779 | add_taint(TAINT_WARN, LOCKDEP_STILL_OK); |
2789 | } | 2780 | } |
2790 | 2781 | ||
2791 | /* | 2782 | /* |
@@ -4410,20 +4401,32 @@ EXPORT_SYMBOL(yield); | |||
4410 | * It's the caller's job to ensure that the target task struct | 4401 | * It's the caller's job to ensure that the target task struct |
4411 | * can't go away on us before we can do any checks. | 4402 | * can't go away on us before we can do any checks. |
4412 | * | 4403 | * |
4413 | * Returns true if we indeed boosted the target task. | 4404 | * Returns: |
4405 | * true (>0) if we indeed boosted the target task. | ||
4406 | * false (0) if we failed to boost the target. | ||
4407 | * -ESRCH if there's no task to yield to. | ||
4414 | */ | 4408 | */ |
4415 | bool __sched yield_to(struct task_struct *p, bool preempt) | 4409 | bool __sched yield_to(struct task_struct *p, bool preempt) |
4416 | { | 4410 | { |
4417 | struct task_struct *curr = current; | 4411 | struct task_struct *curr = current; |
4418 | struct rq *rq, *p_rq; | 4412 | struct rq *rq, *p_rq; |
4419 | unsigned long flags; | 4413 | unsigned long flags; |
4420 | bool yielded = 0; | 4414 | int yielded = 0; |
4421 | 4415 | ||
4422 | local_irq_save(flags); | 4416 | local_irq_save(flags); |
4423 | rq = this_rq(); | 4417 | rq = this_rq(); |
4424 | 4418 | ||
4425 | again: | 4419 | again: |
4426 | p_rq = task_rq(p); | 4420 | p_rq = task_rq(p); |
4421 | /* | ||
4422 | * If we're the only runnable task on the rq and target rq also | ||
4423 | * has only one task, there's absolutely no point in yielding. | ||
4424 | */ | ||
4425 | if (rq->nr_running == 1 && p_rq->nr_running == 1) { | ||
4426 | yielded = -ESRCH; | ||
4427 | goto out_irq; | ||
4428 | } | ||
4429 | |||
4427 | double_rq_lock(rq, p_rq); | 4430 | double_rq_lock(rq, p_rq); |
4428 | while (task_rq(p) != p_rq) { | 4431 | while (task_rq(p) != p_rq) { |
4429 | double_rq_unlock(rq, p_rq); | 4432 | double_rq_unlock(rq, p_rq); |
@@ -4431,13 +4434,13 @@ again: | |||
4431 | } | 4434 | } |
4432 | 4435 | ||
4433 | if (!curr->sched_class->yield_to_task) | 4436 | if (!curr->sched_class->yield_to_task) |
4434 | goto out; | 4437 | goto out_unlock; |
4435 | 4438 | ||
4436 | if (curr->sched_class != p->sched_class) | 4439 | if (curr->sched_class != p->sched_class) |
4437 | goto out; | 4440 | goto out_unlock; |
4438 | 4441 | ||
4439 | if (task_running(p_rq, p) || p->state) | 4442 | if (task_running(p_rq, p) || p->state) |
4440 | goto out; | 4443 | goto out_unlock; |
4441 | 4444 | ||
4442 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); | 4445 | yielded = curr->sched_class->yield_to_task(rq, p, preempt); |
4443 | if (yielded) { | 4446 | if (yielded) { |
@@ -4450,11 +4453,12 @@ again: | |||
4450 | resched_task(p_rq->curr); | 4453 | resched_task(p_rq->curr); |
4451 | } | 4454 | } |
4452 | 4455 | ||
4453 | out: | 4456 | out_unlock: |
4454 | double_rq_unlock(rq, p_rq); | 4457 | double_rq_unlock(rq, p_rq); |
4458 | out_irq: | ||
4455 | local_irq_restore(flags); | 4459 | local_irq_restore(flags); |
4456 | 4460 | ||
4457 | if (yielded) | 4461 | if (yielded > 0) |
4458 | schedule(); | 4462 | schedule(); |
4459 | 4463 | ||
4460 | return yielded; | 4464 | return yielded; |
@@ -4713,6 +4717,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
4713 | */ | 4717 | */ |
4714 | idle->sched_class = &idle_sched_class; | 4718 | idle->sched_class = &idle_sched_class; |
4715 | ftrace_graph_init_idle_task(idle, cpu); | 4719 | ftrace_graph_init_idle_task(idle, cpu); |
4720 | vtime_init_idle(idle); | ||
4716 | #if defined(CONFIG_SMP) | 4721 | #if defined(CONFIG_SMP) |
4717 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | 4722 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); |
4718 | #endif | 4723 | #endif |
@@ -7206,7 +7211,6 @@ static void free_sched_group(struct task_group *tg) | |||
7206 | struct task_group *sched_create_group(struct task_group *parent) | 7211 | struct task_group *sched_create_group(struct task_group *parent) |
7207 | { | 7212 | { |
7208 | struct task_group *tg; | 7213 | struct task_group *tg; |
7209 | unsigned long flags; | ||
7210 | 7214 | ||
7211 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 7215 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
7212 | if (!tg) | 7216 | if (!tg) |
@@ -7218,6 +7222,17 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
7218 | if (!alloc_rt_sched_group(tg, parent)) | 7222 | if (!alloc_rt_sched_group(tg, parent)) |
7219 | goto err; | 7223 | goto err; |
7220 | 7224 | ||
7225 | return tg; | ||
7226 | |||
7227 | err: | ||
7228 | free_sched_group(tg); | ||
7229 | return ERR_PTR(-ENOMEM); | ||
7230 | } | ||
7231 | |||
7232 | void sched_online_group(struct task_group *tg, struct task_group *parent) | ||
7233 | { | ||
7234 | unsigned long flags; | ||
7235 | |||
7221 | spin_lock_irqsave(&task_group_lock, flags); | 7236 | spin_lock_irqsave(&task_group_lock, flags); |
7222 | list_add_rcu(&tg->list, &task_groups); | 7237 | list_add_rcu(&tg->list, &task_groups); |
7223 | 7238 | ||
@@ -7227,12 +7242,6 @@ struct task_group *sched_create_group(struct task_group *parent) | |||
7227 | INIT_LIST_HEAD(&tg->children); | 7242 | INIT_LIST_HEAD(&tg->children); |
7228 | list_add_rcu(&tg->siblings, &parent->children); | 7243 | list_add_rcu(&tg->siblings, &parent->children); |
7229 | spin_unlock_irqrestore(&task_group_lock, flags); | 7244 | spin_unlock_irqrestore(&task_group_lock, flags); |
7230 | |||
7231 | return tg; | ||
7232 | |||
7233 | err: | ||
7234 | free_sched_group(tg); | ||
7235 | return ERR_PTR(-ENOMEM); | ||
7236 | } | 7245 | } |
7237 | 7246 | ||
7238 | /* rcu callback to free various structures associated with a task group */ | 7247 | /* rcu callback to free various structures associated with a task group */ |
@@ -7245,6 +7254,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp) | |||
7245 | /* Destroy runqueue etc associated with a task group */ | 7254 | /* Destroy runqueue etc associated with a task group */ |
7246 | void sched_destroy_group(struct task_group *tg) | 7255 | void sched_destroy_group(struct task_group *tg) |
7247 | { | 7256 | { |
7257 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
7258 | call_rcu(&tg->rcu, free_sched_group_rcu); | ||
7259 | } | ||
7260 | |||
7261 | void sched_offline_group(struct task_group *tg) | ||
7262 | { | ||
7248 | unsigned long flags; | 7263 | unsigned long flags; |
7249 | int i; | 7264 | int i; |
7250 | 7265 | ||
@@ -7256,9 +7271,6 @@ void sched_destroy_group(struct task_group *tg) | |||
7256 | list_del_rcu(&tg->list); | 7271 | list_del_rcu(&tg->list); |
7257 | list_del_rcu(&tg->siblings); | 7272 | list_del_rcu(&tg->siblings); |
7258 | spin_unlock_irqrestore(&task_group_lock, flags); | 7273 | spin_unlock_irqrestore(&task_group_lock, flags); |
7259 | |||
7260 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
7261 | call_rcu(&tg->rcu, free_sched_group_rcu); | ||
7262 | } | 7274 | } |
7263 | 7275 | ||
7264 | /* change task's runqueue when it moves between groups. | 7276 | /* change task's runqueue when it moves between groups. |
@@ -7554,6 +7566,25 @@ static int sched_rt_global_constraints(void) | |||
7554 | } | 7566 | } |
7555 | #endif /* CONFIG_RT_GROUP_SCHED */ | 7567 | #endif /* CONFIG_RT_GROUP_SCHED */ |
7556 | 7568 | ||
7569 | int sched_rr_handler(struct ctl_table *table, int write, | ||
7570 | void __user *buffer, size_t *lenp, | ||
7571 | loff_t *ppos) | ||
7572 | { | ||
7573 | int ret; | ||
7574 | static DEFINE_MUTEX(mutex); | ||
7575 | |||
7576 | mutex_lock(&mutex); | ||
7577 | ret = proc_dointvec(table, write, buffer, lenp, ppos); | ||
7578 | /* make sure that internally we keep jiffies */ | ||
7579 | /* also, writing zero resets timeslice to default */ | ||
7580 | if (!ret && write) { | ||
7581 | sched_rr_timeslice = sched_rr_timeslice <= 0 ? | ||
7582 | RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice); | ||
7583 | } | ||
7584 | mutex_unlock(&mutex); | ||
7585 | return ret; | ||
7586 | } | ||
7587 | |||
7557 | int sched_rt_handler(struct ctl_table *table, int write, | 7588 | int sched_rt_handler(struct ctl_table *table, int write, |
7558 | void __user *buffer, size_t *lenp, | 7589 | void __user *buffer, size_t *lenp, |
7559 | loff_t *ppos) | 7590 | loff_t *ppos) |
@@ -7610,6 +7641,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) | |||
7610 | return &tg->css; | 7641 | return &tg->css; |
7611 | } | 7642 | } |
7612 | 7643 | ||
7644 | static int cpu_cgroup_css_online(struct cgroup *cgrp) | ||
7645 | { | ||
7646 | struct task_group *tg = cgroup_tg(cgrp); | ||
7647 | struct task_group *parent; | ||
7648 | |||
7649 | if (!cgrp->parent) | ||
7650 | return 0; | ||
7651 | |||
7652 | parent = cgroup_tg(cgrp->parent); | ||
7653 | sched_online_group(tg, parent); | ||
7654 | return 0; | ||
7655 | } | ||
7656 | |||
7613 | static void cpu_cgroup_css_free(struct cgroup *cgrp) | 7657 | static void cpu_cgroup_css_free(struct cgroup *cgrp) |
7614 | { | 7658 | { |
7615 | struct task_group *tg = cgroup_tg(cgrp); | 7659 | struct task_group *tg = cgroup_tg(cgrp); |
@@ -7617,6 +7661,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp) | |||
7617 | sched_destroy_group(tg); | 7661 | sched_destroy_group(tg); |
7618 | } | 7662 | } |
7619 | 7663 | ||
7664 | static void cpu_cgroup_css_offline(struct cgroup *cgrp) | ||
7665 | { | ||
7666 | struct task_group *tg = cgroup_tg(cgrp); | ||
7667 | |||
7668 | sched_offline_group(tg); | ||
7669 | } | ||
7670 | |||
7620 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, | 7671 | static int cpu_cgroup_can_attach(struct cgroup *cgrp, |
7621 | struct cgroup_taskset *tset) | 7672 | struct cgroup_taskset *tset) |
7622 | { | 7673 | { |
@@ -7972,6 +8023,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7972 | .name = "cpu", | 8023 | .name = "cpu", |
7973 | .css_alloc = cpu_cgroup_css_alloc, | 8024 | .css_alloc = cpu_cgroup_css_alloc, |
7974 | .css_free = cpu_cgroup_css_free, | 8025 | .css_free = cpu_cgroup_css_free, |
8026 | .css_online = cpu_cgroup_css_online, | ||
8027 | .css_offline = cpu_cgroup_css_offline, | ||
7975 | .can_attach = cpu_cgroup_can_attach, | 8028 | .can_attach = cpu_cgroup_can_attach, |
7976 | .attach = cpu_cgroup_attach, | 8029 | .attach = cpu_cgroup_attach, |
7977 | .exit = cpu_cgroup_exit, | 8030 | .exit = cpu_cgroup_exit, |
diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c index 23aa789c53ee..1095e878a46f 100644 --- a/kernel/sched/cpupri.c +++ b/kernel/sched/cpupri.c | |||
@@ -28,6 +28,8 @@ | |||
28 | */ | 28 | */ |
29 | 29 | ||
30 | #include <linux/gfp.h> | 30 | #include <linux/gfp.h> |
31 | #include <linux/sched.h> | ||
32 | #include <linux/sched/rt.h> | ||
31 | #include "cpupri.h" | 33 | #include "cpupri.h" |
32 | 34 | ||
33 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ | 35 | /* Convert between a 140 based task->prio, and our 102 based cpupri */ |
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 293b202fcf79..ed12cbb135f4 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/tsacct_kern.h> | 3 | #include <linux/tsacct_kern.h> |
4 | #include <linux/kernel_stat.h> | 4 | #include <linux/kernel_stat.h> |
5 | #include <linux/static_key.h> | 5 | #include <linux/static_key.h> |
6 | #include <linux/context_tracking.h> | ||
6 | #include "sched.h" | 7 | #include "sched.h" |
7 | 8 | ||
8 | 9 | ||
@@ -163,7 +164,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
163 | task_group_account_field(p, index, (__force u64) cputime); | 164 | task_group_account_field(p, index, (__force u64) cputime); |
164 | 165 | ||
165 | /* Account for user time used */ | 166 | /* Account for user time used */ |
166 | acct_update_integrals(p); | 167 | acct_account_cputime(p); |
167 | } | 168 | } |
168 | 169 | ||
169 | /* | 170 | /* |
@@ -213,7 +214,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime, | |||
213 | task_group_account_field(p, index, (__force u64) cputime); | 214 | task_group_account_field(p, index, (__force u64) cputime); |
214 | 215 | ||
215 | /* Account for system time used */ | 216 | /* Account for system time used */ |
216 | acct_update_integrals(p); | 217 | acct_account_cputime(p); |
217 | } | 218 | } |
218 | 219 | ||
219 | /* | 220 | /* |
@@ -295,6 +296,7 @@ static __always_inline bool steal_account_process_tick(void) | |||
295 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | 296 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) |
296 | { | 297 | { |
297 | struct signal_struct *sig = tsk->signal; | 298 | struct signal_struct *sig = tsk->signal; |
299 | cputime_t utime, stime; | ||
298 | struct task_struct *t; | 300 | struct task_struct *t; |
299 | 301 | ||
300 | times->utime = sig->utime; | 302 | times->utime = sig->utime; |
@@ -308,16 +310,15 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | |||
308 | 310 | ||
309 | t = tsk; | 311 | t = tsk; |
310 | do { | 312 | do { |
311 | times->utime += t->utime; | 313 | task_cputime(tsk, &utime, &stime); |
312 | times->stime += t->stime; | 314 | times->utime += utime; |
315 | times->stime += stime; | ||
313 | times->sum_exec_runtime += task_sched_runtime(t); | 316 | times->sum_exec_runtime += task_sched_runtime(t); |
314 | } while_each_thread(tsk, t); | 317 | } while_each_thread(tsk, t); |
315 | out: | 318 | out: |
316 | rcu_read_unlock(); | 319 | rcu_read_unlock(); |
317 | } | 320 | } |
318 | 321 | ||
319 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | ||
320 | |||
321 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 322 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
322 | /* | 323 | /* |
323 | * Account a tick to a process and cpustat | 324 | * Account a tick to a process and cpustat |
@@ -382,11 +383,12 @@ static void irqtime_account_idle_ticks(int ticks) | |||
382 | irqtime_account_process_tick(current, 0, rq); | 383 | irqtime_account_process_tick(current, 0, rq); |
383 | } | 384 | } |
384 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | 385 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ |
385 | static void irqtime_account_idle_ticks(int ticks) {} | 386 | static inline void irqtime_account_idle_ticks(int ticks) {} |
386 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | 387 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
387 | struct rq *rq) {} | 388 | struct rq *rq) {} |
388 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | 389 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
389 | 390 | ||
391 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | ||
390 | /* | 392 | /* |
391 | * Account a single tick of cpu time. | 393 | * Account a single tick of cpu time. |
392 | * @p: the process that the cpu time gets accounted to | 394 | * @p: the process that the cpu time gets accounted to |
@@ -397,6 +399,9 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
397 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | 399 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
398 | struct rq *rq = this_rq(); | 400 | struct rq *rq = this_rq(); |
399 | 401 | ||
402 | if (vtime_accounting_enabled()) | ||
403 | return; | ||
404 | |||
400 | if (sched_clock_irqtime) { | 405 | if (sched_clock_irqtime) { |
401 | irqtime_account_process_tick(p, user_tick, rq); | 406 | irqtime_account_process_tick(p, user_tick, rq); |
402 | return; | 407 | return; |
@@ -438,8 +443,7 @@ void account_idle_ticks(unsigned long ticks) | |||
438 | 443 | ||
439 | account_idle_time(jiffies_to_cputime(ticks)); | 444 | account_idle_time(jiffies_to_cputime(ticks)); |
440 | } | 445 | } |
441 | 446 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ | |
442 | #endif | ||
443 | 447 | ||
444 | /* | 448 | /* |
445 | * Use precise platform statistics if available: | 449 | * Use precise platform statistics if available: |
@@ -461,25 +465,20 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime | |||
461 | *st = cputime.stime; | 465 | *st = cputime.stime; |
462 | } | 466 | } |
463 | 467 | ||
464 | void vtime_account_system_irqsafe(struct task_struct *tsk) | ||
465 | { | ||
466 | unsigned long flags; | ||
467 | |||
468 | local_irq_save(flags); | ||
469 | vtime_account_system(tsk); | ||
470 | local_irq_restore(flags); | ||
471 | } | ||
472 | EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); | ||
473 | |||
474 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH | 468 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
475 | void vtime_task_switch(struct task_struct *prev) | 469 | void vtime_task_switch(struct task_struct *prev) |
476 | { | 470 | { |
471 | if (!vtime_accounting_enabled()) | ||
472 | return; | ||
473 | |||
477 | if (is_idle_task(prev)) | 474 | if (is_idle_task(prev)) |
478 | vtime_account_idle(prev); | 475 | vtime_account_idle(prev); |
479 | else | 476 | else |
480 | vtime_account_system(prev); | 477 | vtime_account_system(prev); |
481 | 478 | ||
479 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | ||
482 | vtime_account_user(prev); | 480 | vtime_account_user(prev); |
481 | #endif | ||
483 | arch_vtime_task_switch(prev); | 482 | arch_vtime_task_switch(prev); |
484 | } | 483 | } |
485 | #endif | 484 | #endif |
@@ -493,27 +492,40 @@ void vtime_task_switch(struct task_struct *prev) | |||
493 | * vtime_account(). | 492 | * vtime_account(). |
494 | */ | 493 | */ |
495 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | 494 | #ifndef __ARCH_HAS_VTIME_ACCOUNT |
496 | void vtime_account(struct task_struct *tsk) | 495 | void vtime_account_irq_enter(struct task_struct *tsk) |
497 | { | 496 | { |
498 | if (in_interrupt() || !is_idle_task(tsk)) | 497 | if (!vtime_accounting_enabled()) |
499 | vtime_account_system(tsk); | 498 | return; |
500 | else | 499 | |
501 | vtime_account_idle(tsk); | 500 | if (!in_interrupt()) { |
501 | /* | ||
502 | * If we interrupted user, context_tracking_in_user() | ||
503 | * is 1 because the context tracking don't hook | ||
504 | * on irq entry/exit. This way we know if | ||
505 | * we need to flush user time on kernel entry. | ||
506 | */ | ||
507 | if (context_tracking_in_user()) { | ||
508 | vtime_account_user(tsk); | ||
509 | return; | ||
510 | } | ||
511 | |||
512 | if (is_idle_task(tsk)) { | ||
513 | vtime_account_idle(tsk); | ||
514 | return; | ||
515 | } | ||
516 | } | ||
517 | vtime_account_system(tsk); | ||
502 | } | 518 | } |
503 | EXPORT_SYMBOL_GPL(vtime_account); | 519 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
504 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | 520 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
505 | 521 | ||
506 | #else | 522 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
507 | |||
508 | #ifndef nsecs_to_cputime | ||
509 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
510 | #endif | ||
511 | 523 | ||
512 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | 524 | static cputime_t scale_stime(cputime_t stime, cputime_t rtime, cputime_t total) |
513 | { | 525 | { |
514 | u64 temp = (__force u64) rtime; | 526 | u64 temp = (__force u64) rtime; |
515 | 527 | ||
516 | temp *= (__force u64) utime; | 528 | temp *= (__force u64) stime; |
517 | 529 | ||
518 | if (sizeof(cputime_t) == 4) | 530 | if (sizeof(cputime_t) == 4) |
519 | temp = div_u64(temp, (__force u32) total); | 531 | temp = div_u64(temp, (__force u32) total); |
@@ -531,10 +543,10 @@ static void cputime_adjust(struct task_cputime *curr, | |||
531 | struct cputime *prev, | 543 | struct cputime *prev, |
532 | cputime_t *ut, cputime_t *st) | 544 | cputime_t *ut, cputime_t *st) |
533 | { | 545 | { |
534 | cputime_t rtime, utime, total; | 546 | cputime_t rtime, stime, total; |
535 | 547 | ||
536 | utime = curr->utime; | 548 | stime = curr->stime; |
537 | total = utime + curr->stime; | 549 | total = stime + curr->utime; |
538 | 550 | ||
539 | /* | 551 | /* |
540 | * Tick based cputime accounting depend on random scheduling | 552 | * Tick based cputime accounting depend on random scheduling |
@@ -549,17 +561,17 @@ static void cputime_adjust(struct task_cputime *curr, | |||
549 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); | 561 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); |
550 | 562 | ||
551 | if (total) | 563 | if (total) |
552 | utime = scale_utime(utime, rtime, total); | 564 | stime = scale_stime(stime, rtime, total); |
553 | else | 565 | else |
554 | utime = rtime; | 566 | stime = rtime; |
555 | 567 | ||
556 | /* | 568 | /* |
557 | * If the tick based count grows faster than the scheduler one, | 569 | * If the tick based count grows faster than the scheduler one, |
558 | * the result of the scaling may go backward. | 570 | * the result of the scaling may go backward. |
559 | * Let's enforce monotonicity. | 571 | * Let's enforce monotonicity. |
560 | */ | 572 | */ |
561 | prev->utime = max(prev->utime, utime); | 573 | prev->stime = max(prev->stime, stime); |
562 | prev->stime = max(prev->stime, rtime - prev->utime); | 574 | prev->utime = max(prev->utime, rtime - prev->stime); |
563 | 575 | ||
564 | *ut = prev->utime; | 576 | *ut = prev->utime; |
565 | *st = prev->stime; | 577 | *st = prev->stime; |
@@ -568,11 +580,10 @@ static void cputime_adjust(struct task_cputime *curr, | |||
568 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) | 580 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
569 | { | 581 | { |
570 | struct task_cputime cputime = { | 582 | struct task_cputime cputime = { |
571 | .utime = p->utime, | ||
572 | .stime = p->stime, | ||
573 | .sum_exec_runtime = p->se.sum_exec_runtime, | 583 | .sum_exec_runtime = p->se.sum_exec_runtime, |
574 | }; | 584 | }; |
575 | 585 | ||
586 | task_cputime(p, &cputime.utime, &cputime.stime); | ||
576 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); | 587 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
577 | } | 588 | } |
578 | 589 | ||
@@ -586,4 +597,221 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime | |||
586 | thread_group_cputime(p, &cputime); | 597 | thread_group_cputime(p, &cputime); |
587 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); | 598 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
588 | } | 599 | } |
589 | #endif | 600 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ |
601 | |||
602 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | ||
603 | static unsigned long long vtime_delta(struct task_struct *tsk) | ||
604 | { | ||
605 | unsigned long long clock; | ||
606 | |||
607 | clock = local_clock(); | ||
608 | if (clock < tsk->vtime_snap) | ||
609 | return 0; | ||
610 | |||
611 | return clock - tsk->vtime_snap; | ||
612 | } | ||
613 | |||
614 | static cputime_t get_vtime_delta(struct task_struct *tsk) | ||
615 | { | ||
616 | unsigned long long delta = vtime_delta(tsk); | ||
617 | |||
618 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); | ||
619 | tsk->vtime_snap += delta; | ||
620 | |||
621 | /* CHECKME: always safe to convert nsecs to cputime? */ | ||
622 | return nsecs_to_cputime(delta); | ||
623 | } | ||
624 | |||
625 | static void __vtime_account_system(struct task_struct *tsk) | ||
626 | { | ||
627 | cputime_t delta_cpu = get_vtime_delta(tsk); | ||
628 | |||
629 | account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); | ||
630 | } | ||
631 | |||
632 | void vtime_account_system(struct task_struct *tsk) | ||
633 | { | ||
634 | if (!vtime_accounting_enabled()) | ||
635 | return; | ||
636 | |||
637 | write_seqlock(&tsk->vtime_seqlock); | ||
638 | __vtime_account_system(tsk); | ||
639 | write_sequnlock(&tsk->vtime_seqlock); | ||
640 | } | ||
641 | |||
642 | void vtime_account_irq_exit(struct task_struct *tsk) | ||
643 | { | ||
644 | if (!vtime_accounting_enabled()) | ||
645 | return; | ||
646 | |||
647 | write_seqlock(&tsk->vtime_seqlock); | ||
648 | if (context_tracking_in_user()) | ||
649 | tsk->vtime_snap_whence = VTIME_USER; | ||
650 | __vtime_account_system(tsk); | ||
651 | write_sequnlock(&tsk->vtime_seqlock); | ||
652 | } | ||
653 | |||
654 | void vtime_account_user(struct task_struct *tsk) | ||
655 | { | ||
656 | cputime_t delta_cpu; | ||
657 | |||
658 | if (!vtime_accounting_enabled()) | ||
659 | return; | ||
660 | |||
661 | delta_cpu = get_vtime_delta(tsk); | ||
662 | |||
663 | write_seqlock(&tsk->vtime_seqlock); | ||
664 | tsk->vtime_snap_whence = VTIME_SYS; | ||
665 | account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); | ||
666 | write_sequnlock(&tsk->vtime_seqlock); | ||
667 | } | ||
668 | |||
669 | void vtime_user_enter(struct task_struct *tsk) | ||
670 | { | ||
671 | if (!vtime_accounting_enabled()) | ||
672 | return; | ||
673 | |||
674 | write_seqlock(&tsk->vtime_seqlock); | ||
675 | tsk->vtime_snap_whence = VTIME_USER; | ||
676 | __vtime_account_system(tsk); | ||
677 | write_sequnlock(&tsk->vtime_seqlock); | ||
678 | } | ||
679 | |||
680 | void vtime_guest_enter(struct task_struct *tsk) | ||
681 | { | ||
682 | write_seqlock(&tsk->vtime_seqlock); | ||
683 | __vtime_account_system(tsk); | ||
684 | current->flags |= PF_VCPU; | ||
685 | write_sequnlock(&tsk->vtime_seqlock); | ||
686 | } | ||
687 | |||
688 | void vtime_guest_exit(struct task_struct *tsk) | ||
689 | { | ||
690 | write_seqlock(&tsk->vtime_seqlock); | ||
691 | __vtime_account_system(tsk); | ||
692 | current->flags &= ~PF_VCPU; | ||
693 | write_sequnlock(&tsk->vtime_seqlock); | ||
694 | } | ||
695 | |||
696 | void vtime_account_idle(struct task_struct *tsk) | ||
697 | { | ||
698 | cputime_t delta_cpu = get_vtime_delta(tsk); | ||
699 | |||
700 | account_idle_time(delta_cpu); | ||
701 | } | ||
702 | |||
703 | bool vtime_accounting_enabled(void) | ||
704 | { | ||
705 | return context_tracking_active(); | ||
706 | } | ||
707 | |||
708 | void arch_vtime_task_switch(struct task_struct *prev) | ||
709 | { | ||
710 | write_seqlock(&prev->vtime_seqlock); | ||
711 | prev->vtime_snap_whence = VTIME_SLEEPING; | ||
712 | write_sequnlock(&prev->vtime_seqlock); | ||
713 | |||
714 | write_seqlock(¤t->vtime_seqlock); | ||
715 | current->vtime_snap_whence = VTIME_SYS; | ||
716 | current->vtime_snap = sched_clock(); | ||
717 | write_sequnlock(¤t->vtime_seqlock); | ||
718 | } | ||
719 | |||
720 | void vtime_init_idle(struct task_struct *t) | ||
721 | { | ||
722 | unsigned long flags; | ||
723 | |||
724 | write_seqlock_irqsave(&t->vtime_seqlock, flags); | ||
725 | t->vtime_snap_whence = VTIME_SYS; | ||
726 | t->vtime_snap = sched_clock(); | ||
727 | write_sequnlock_irqrestore(&t->vtime_seqlock, flags); | ||
728 | } | ||
729 | |||
730 | cputime_t task_gtime(struct task_struct *t) | ||
731 | { | ||
732 | unsigned int seq; | ||
733 | cputime_t gtime; | ||
734 | |||
735 | do { | ||
736 | seq = read_seqbegin(&t->vtime_seqlock); | ||
737 | |||
738 | gtime = t->gtime; | ||
739 | if (t->flags & PF_VCPU) | ||
740 | gtime += vtime_delta(t); | ||
741 | |||
742 | } while (read_seqretry(&t->vtime_seqlock, seq)); | ||
743 | |||
744 | return gtime; | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Fetch cputime raw values from fields of task_struct and | ||
749 | * add up the pending nohz execution time since the last | ||
750 | * cputime snapshot. | ||
751 | */ | ||
752 | static void | ||
753 | fetch_task_cputime(struct task_struct *t, | ||
754 | cputime_t *u_dst, cputime_t *s_dst, | ||
755 | cputime_t *u_src, cputime_t *s_src, | ||
756 | cputime_t *udelta, cputime_t *sdelta) | ||
757 | { | ||
758 | unsigned int seq; | ||
759 | unsigned long long delta; | ||
760 | |||
761 | do { | ||
762 | *udelta = 0; | ||
763 | *sdelta = 0; | ||
764 | |||
765 | seq = read_seqbegin(&t->vtime_seqlock); | ||
766 | |||
767 | if (u_dst) | ||
768 | *u_dst = *u_src; | ||
769 | if (s_dst) | ||
770 | *s_dst = *s_src; | ||
771 | |||
772 | /* Task is sleeping, nothing to add */ | ||
773 | if (t->vtime_snap_whence == VTIME_SLEEPING || | ||
774 | is_idle_task(t)) | ||
775 | continue; | ||
776 | |||
777 | delta = vtime_delta(t); | ||
778 | |||
779 | /* | ||
780 | * Task runs either in user or kernel space, add pending nohz time to | ||
781 | * the right place. | ||
782 | */ | ||
783 | if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { | ||
784 | *udelta = delta; | ||
785 | } else { | ||
786 | if (t->vtime_snap_whence == VTIME_SYS) | ||
787 | *sdelta = delta; | ||
788 | } | ||
789 | } while (read_seqretry(&t->vtime_seqlock, seq)); | ||
790 | } | ||
791 | |||
792 | |||
793 | void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) | ||
794 | { | ||
795 | cputime_t udelta, sdelta; | ||
796 | |||
797 | fetch_task_cputime(t, utime, stime, &t->utime, | ||
798 | &t->stime, &udelta, &sdelta); | ||
799 | if (utime) | ||
800 | *utime += udelta; | ||
801 | if (stime) | ||
802 | *stime += sdelta; | ||
803 | } | ||
804 | |||
805 | void task_cputime_scaled(struct task_struct *t, | ||
806 | cputime_t *utimescaled, cputime_t *stimescaled) | ||
807 | { | ||
808 | cputime_t udelta, sdelta; | ||
809 | |||
810 | fetch_task_cputime(t, utimescaled, stimescaled, | ||
811 | &t->utimescaled, &t->stimescaled, &udelta, &sdelta); | ||
812 | if (utimescaled) | ||
813 | *utimescaled += cputime_to_scaled(udelta); | ||
814 | if (stimescaled) | ||
815 | *stimescaled += cputime_to_scaled(sdelta); | ||
816 | } | ||
817 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ | ||
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 2cd3c1b4e582..75024a673520 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
@@ -110,13 +110,6 @@ static char *task_group_path(struct task_group *tg) | |||
110 | if (autogroup_path(tg, group_path, PATH_MAX)) | 110 | if (autogroup_path(tg, group_path, PATH_MAX)) |
111 | return group_path; | 111 | return group_path; |
112 | 112 | ||
113 | /* | ||
114 | * May be NULL if the underlying cgroup isn't fully-created yet | ||
115 | */ | ||
116 | if (!tg->css.cgroup) { | ||
117 | group_path[0] = '\0'; | ||
118 | return group_path; | ||
119 | } | ||
120 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); | 113 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); |
121 | return group_path; | 114 | return group_path; |
122 | } | 115 | } |
@@ -222,8 +215,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
222 | cfs_rq->runnable_load_avg); | 215 | cfs_rq->runnable_load_avg); |
223 | SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", | 216 | SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg", |
224 | cfs_rq->blocked_load_avg); | 217 | cfs_rq->blocked_load_avg); |
225 | SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", | 218 | SEQ_printf(m, " .%-30s: %lld\n", "tg_load_avg", |
226 | atomic64_read(&cfs_rq->tg->load_avg)); | 219 | (unsigned long long)atomic64_read(&cfs_rq->tg->load_avg)); |
227 | SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib", | 220 | SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib", |
228 | cfs_rq->tg_load_contrib); | 221 | cfs_rq->tg_load_contrib); |
229 | SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib", | 222 | SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib", |
@@ -269,11 +262,11 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
269 | { | 262 | { |
270 | unsigned int freq = cpu_khz ? : 1; | 263 | unsigned int freq = cpu_khz ? : 1; |
271 | 264 | ||
272 | SEQ_printf(m, "\ncpu#%d, %u.%03u MHz\n", | 265 | SEQ_printf(m, "cpu#%d, %u.%03u MHz\n", |
273 | cpu, freq / 1000, (freq % 1000)); | 266 | cpu, freq / 1000, (freq % 1000)); |
274 | } | 267 | } |
275 | #else | 268 | #else |
276 | SEQ_printf(m, "\ncpu#%d\n", cpu); | 269 | SEQ_printf(m, "cpu#%d\n", cpu); |
277 | #endif | 270 | #endif |
278 | 271 | ||
279 | #define P(x) \ | 272 | #define P(x) \ |
@@ -330,6 +323,7 @@ do { \ | |||
330 | print_rq(m, rq, cpu); | 323 | print_rq(m, rq, cpu); |
331 | rcu_read_unlock(); | 324 | rcu_read_unlock(); |
332 | spin_unlock_irqrestore(&sched_debug_lock, flags); | 325 | spin_unlock_irqrestore(&sched_debug_lock, flags); |
326 | SEQ_printf(m, "\n"); | ||
333 | } | 327 | } |
334 | 328 | ||
335 | static const char *sched_tunable_scaling_names[] = { | 329 | static const char *sched_tunable_scaling_names[] = { |
@@ -338,11 +332,10 @@ static const char *sched_tunable_scaling_names[] = { | |||
338 | "linear" | 332 | "linear" |
339 | }; | 333 | }; |
340 | 334 | ||
341 | static int sched_debug_show(struct seq_file *m, void *v) | 335 | static void sched_debug_header(struct seq_file *m) |
342 | { | 336 | { |
343 | u64 ktime, sched_clk, cpu_clk; | 337 | u64 ktime, sched_clk, cpu_clk; |
344 | unsigned long flags; | 338 | unsigned long flags; |
345 | int cpu; | ||
346 | 339 | ||
347 | local_irq_save(flags); | 340 | local_irq_save(flags); |
348 | ktime = ktime_to_ns(ktime_get()); | 341 | ktime = ktime_to_ns(ktime_get()); |
@@ -384,33 +377,101 @@ static int sched_debug_show(struct seq_file *m, void *v) | |||
384 | #undef PN | 377 | #undef PN |
385 | #undef P | 378 | #undef P |
386 | 379 | ||
387 | SEQ_printf(m, " .%-40s: %d (%s)\n", "sysctl_sched_tunable_scaling", | 380 | SEQ_printf(m, " .%-40s: %d (%s)\n", |
381 | "sysctl_sched_tunable_scaling", | ||
388 | sysctl_sched_tunable_scaling, | 382 | sysctl_sched_tunable_scaling, |
389 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); | 383 | sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); |
384 | SEQ_printf(m, "\n"); | ||
385 | } | ||
390 | 386 | ||
391 | for_each_online_cpu(cpu) | 387 | static int sched_debug_show(struct seq_file *m, void *v) |
392 | print_cpu(m, cpu); | 388 | { |
389 | int cpu = (unsigned long)(v - 2); | ||
393 | 390 | ||
394 | SEQ_printf(m, "\n"); | 391 | if (cpu != -1) |
392 | print_cpu(m, cpu); | ||
393 | else | ||
394 | sched_debug_header(m); | ||
395 | 395 | ||
396 | return 0; | 396 | return 0; |
397 | } | 397 | } |
398 | 398 | ||
399 | void sysrq_sched_debug_show(void) | 399 | void sysrq_sched_debug_show(void) |
400 | { | 400 | { |
401 | sched_debug_show(NULL, NULL); | 401 | int cpu; |
402 | |||
403 | sched_debug_header(NULL); | ||
404 | for_each_online_cpu(cpu) | ||
405 | print_cpu(NULL, cpu); | ||
406 | |||
407 | } | ||
408 | |||
409 | /* | ||
410 | * This itererator needs some explanation. | ||
411 | * It returns 1 for the header position. | ||
412 | * This means 2 is cpu 0. | ||
413 | * In a hotplugged system some cpus, including cpu 0, may be missing so we have | ||
414 | * to use cpumask_* to iterate over the cpus. | ||
415 | */ | ||
416 | static void *sched_debug_start(struct seq_file *file, loff_t *offset) | ||
417 | { | ||
418 | unsigned long n = *offset; | ||
419 | |||
420 | if (n == 0) | ||
421 | return (void *) 1; | ||
422 | |||
423 | n--; | ||
424 | |||
425 | if (n > 0) | ||
426 | n = cpumask_next(n - 1, cpu_online_mask); | ||
427 | else | ||
428 | n = cpumask_first(cpu_online_mask); | ||
429 | |||
430 | *offset = n + 1; | ||
431 | |||
432 | if (n < nr_cpu_ids) | ||
433 | return (void *)(unsigned long)(n + 2); | ||
434 | return NULL; | ||
435 | } | ||
436 | |||
437 | static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset) | ||
438 | { | ||
439 | (*offset)++; | ||
440 | return sched_debug_start(file, offset); | ||
441 | } | ||
442 | |||
443 | static void sched_debug_stop(struct seq_file *file, void *data) | ||
444 | { | ||
445 | } | ||
446 | |||
447 | static const struct seq_operations sched_debug_sops = { | ||
448 | .start = sched_debug_start, | ||
449 | .next = sched_debug_next, | ||
450 | .stop = sched_debug_stop, | ||
451 | .show = sched_debug_show, | ||
452 | }; | ||
453 | |||
454 | static int sched_debug_release(struct inode *inode, struct file *file) | ||
455 | { | ||
456 | seq_release(inode, file); | ||
457 | |||
458 | return 0; | ||
402 | } | 459 | } |
403 | 460 | ||
404 | static int sched_debug_open(struct inode *inode, struct file *filp) | 461 | static int sched_debug_open(struct inode *inode, struct file *filp) |
405 | { | 462 | { |
406 | return single_open(filp, sched_debug_show, NULL); | 463 | int ret = 0; |
464 | |||
465 | ret = seq_open(filp, &sched_debug_sops); | ||
466 | |||
467 | return ret; | ||
407 | } | 468 | } |
408 | 469 | ||
409 | static const struct file_operations sched_debug_fops = { | 470 | static const struct file_operations sched_debug_fops = { |
410 | .open = sched_debug_open, | 471 | .open = sched_debug_open, |
411 | .read = seq_read, | 472 | .read = seq_read, |
412 | .llseek = seq_lseek, | 473 | .llseek = seq_lseek, |
413 | .release = single_release, | 474 | .release = sched_debug_release, |
414 | }; | 475 | }; |
415 | 476 | ||
416 | static int __init init_sched_debug_procfs(void) | 477 | static int __init init_sched_debug_procfs(void) |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5eea8707234a..7a33e5986fc5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1680,9 +1680,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
1680 | } | 1680 | } |
1681 | 1681 | ||
1682 | /* ensure we never gain time by being placed backwards. */ | 1682 | /* ensure we never gain time by being placed backwards. */ |
1683 | vruntime = max_vruntime(se->vruntime, vruntime); | 1683 | se->vruntime = max_vruntime(se->vruntime, vruntime); |
1684 | |||
1685 | se->vruntime = vruntime; | ||
1686 | } | 1684 | } |
1687 | 1685 | ||
1688 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq); | 1686 | static void check_enqueue_throttle(struct cfs_rq *cfs_rq); |
@@ -2663,7 +2661,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) | |||
2663 | hrtimer_cancel(&cfs_b->slack_timer); | 2661 | hrtimer_cancel(&cfs_b->slack_timer); |
2664 | } | 2662 | } |
2665 | 2663 | ||
2666 | static void unthrottle_offline_cfs_rqs(struct rq *rq) | 2664 | static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) |
2667 | { | 2665 | { |
2668 | struct cfs_rq *cfs_rq; | 2666 | struct cfs_rq *cfs_rq; |
2669 | 2667 | ||
@@ -3254,25 +3252,18 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | |||
3254 | */ | 3252 | */ |
3255 | static int select_idle_sibling(struct task_struct *p, int target) | 3253 | static int select_idle_sibling(struct task_struct *p, int target) |
3256 | { | 3254 | { |
3257 | int cpu = smp_processor_id(); | ||
3258 | int prev_cpu = task_cpu(p); | ||
3259 | struct sched_domain *sd; | 3255 | struct sched_domain *sd; |
3260 | struct sched_group *sg; | 3256 | struct sched_group *sg; |
3261 | int i; | 3257 | int i = task_cpu(p); |
3262 | 3258 | ||
3263 | /* | 3259 | if (idle_cpu(target)) |
3264 | * If the task is going to be woken-up on this cpu and if it is | 3260 | return target; |
3265 | * already idle, then it is the right target. | ||
3266 | */ | ||
3267 | if (target == cpu && idle_cpu(cpu)) | ||
3268 | return cpu; | ||
3269 | 3261 | ||
3270 | /* | 3262 | /* |
3271 | * If the task is going to be woken-up on the cpu where it previously | 3263 | * If the prevous cpu is cache affine and idle, don't be stupid. |
3272 | * ran and if it is currently idle, then it the right target. | ||
3273 | */ | 3264 | */ |
3274 | if (target == prev_cpu && idle_cpu(prev_cpu)) | 3265 | if (i != target && cpus_share_cache(i, target) && idle_cpu(i)) |
3275 | return prev_cpu; | 3266 | return i; |
3276 | 3267 | ||
3277 | /* | 3268 | /* |
3278 | * Otherwise, iterate the domains and find an elegible idle cpu. | 3269 | * Otherwise, iterate the domains and find an elegible idle cpu. |
@@ -3286,7 +3277,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
3286 | goto next; | 3277 | goto next; |
3287 | 3278 | ||
3288 | for_each_cpu(i, sched_group_cpus(sg)) { | 3279 | for_each_cpu(i, sched_group_cpus(sg)) { |
3289 | if (!idle_cpu(i)) | 3280 | if (i == target || !idle_cpu(i)) |
3290 | goto next; | 3281 | goto next; |
3291 | } | 3282 | } |
3292 | 3283 | ||
@@ -6101,7 +6092,7 @@ static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task | |||
6101 | * idle runqueue: | 6092 | * idle runqueue: |
6102 | */ | 6093 | */ |
6103 | if (rq->cfs.load.weight) | 6094 | if (rq->cfs.load.weight) |
6104 | rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | 6095 | rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se)); |
6105 | 6096 | ||
6106 | return rr_interval; | 6097 | return rr_interval; |
6107 | } | 6098 | } |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 418feb01344e..127a2c4cf4ab 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -7,6 +7,8 @@ | |||
7 | 7 | ||
8 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
9 | 9 | ||
10 | int sched_rr_timeslice = RR_TIMESLICE; | ||
11 | |||
10 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); | 12 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); |
11 | 13 | ||
12 | struct rt_bandwidth def_rt_bandwidth; | 14 | struct rt_bandwidth def_rt_bandwidth; |
@@ -566,7 +568,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
566 | static int do_balance_runtime(struct rt_rq *rt_rq) | 568 | static int do_balance_runtime(struct rt_rq *rt_rq) |
567 | { | 569 | { |
568 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 570 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
569 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | 571 | struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; |
570 | int i, weight, more = 0; | 572 | int i, weight, more = 0; |
571 | u64 rt_period; | 573 | u64 rt_period; |
572 | 574 | ||
@@ -925,8 +927,8 @@ static void update_curr_rt(struct rq *rq) | |||
925 | return; | 927 | return; |
926 | 928 | ||
927 | delta_exec = rq->clock_task - curr->se.exec_start; | 929 | delta_exec = rq->clock_task - curr->se.exec_start; |
928 | if (unlikely((s64)delta_exec < 0)) | 930 | if (unlikely((s64)delta_exec <= 0)) |
929 | delta_exec = 0; | 931 | return; |
930 | 932 | ||
931 | schedstat_set(curr->se.statistics.exec_max, | 933 | schedstat_set(curr->se.statistics.exec_max, |
932 | max(curr->se.statistics.exec_max, delta_exec)); | 934 | max(curr->se.statistics.exec_max, delta_exec)); |
@@ -1427,8 +1429,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1427 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 1429 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
1428 | { | 1430 | { |
1429 | if (!task_running(rq, p) && | 1431 | if (!task_running(rq, p) && |
1430 | (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && | 1432 | cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) |
1431 | (p->nr_cpus_allowed > 1)) | ||
1432 | return 1; | 1433 | return 1; |
1433 | return 0; | 1434 | return 0; |
1434 | } | 1435 | } |
@@ -1889,8 +1890,11 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1889 | * we may need to handle the pulling of RT tasks | 1890 | * we may need to handle the pulling of RT tasks |
1890 | * now. | 1891 | * now. |
1891 | */ | 1892 | */ |
1892 | if (p->on_rq && !rq->rt.rt_nr_running) | 1893 | if (!p->on_rq || rq->rt.rt_nr_running) |
1893 | pull_rt_task(rq); | 1894 | return; |
1895 | |||
1896 | if (pull_rt_task(rq)) | ||
1897 | resched_task(rq->curr); | ||
1894 | } | 1898 | } |
1895 | 1899 | ||
1896 | void init_sched_rt_class(void) | 1900 | void init_sched_rt_class(void) |
@@ -1985,7 +1989,11 @@ static void watchdog(struct rq *rq, struct task_struct *p) | |||
1985 | if (soft != RLIM_INFINITY) { | 1989 | if (soft != RLIM_INFINITY) { |
1986 | unsigned long next; | 1990 | unsigned long next; |
1987 | 1991 | ||
1988 | p->rt.timeout++; | 1992 | if (p->rt.watchdog_stamp != jiffies) { |
1993 | p->rt.timeout++; | ||
1994 | p->rt.watchdog_stamp = jiffies; | ||
1995 | } | ||
1996 | |||
1989 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); | 1997 | next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); |
1990 | if (p->rt.timeout > next) | 1998 | if (p->rt.timeout > next) |
1991 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; | 1999 | p->cputime_expires.sched_exp = p->se.sum_exec_runtime; |
@@ -2010,7 +2018,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
2010 | if (--p->rt.time_slice) | 2018 | if (--p->rt.time_slice) |
2011 | return; | 2019 | return; |
2012 | 2020 | ||
2013 | p->rt.time_slice = RR_TIMESLICE; | 2021 | p->rt.time_slice = sched_rr_timeslice; |
2014 | 2022 | ||
2015 | /* | 2023 | /* |
2016 | * Requeue to the end of queue if we (and all of our ancestors) are the | 2024 | * Requeue to the end of queue if we (and all of our ancestors) are the |
@@ -2041,7 +2049,7 @@ static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) | |||
2041 | * Time slice is 0 for SCHED_FIFO tasks | 2049 | * Time slice is 0 for SCHED_FIFO tasks |
2042 | */ | 2050 | */ |
2043 | if (task->policy == SCHED_RR) | 2051 | if (task->policy == SCHED_RR) |
2044 | return RR_TIMESLICE; | 2052 | return sched_rr_timeslice; |
2045 | else | 2053 | else |
2046 | return 0; | 2054 | return 0; |
2047 | } | 2055 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fc886441436a..cc03cfdf469f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1,5 +1,7 @@ | |||
1 | 1 | ||
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/sched/sysctl.h> | ||
4 | #include <linux/sched/rt.h> | ||
3 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
4 | #include <linux/spinlock.h> | 6 | #include <linux/spinlock.h> |
5 | #include <linux/stop_machine.h> | 7 | #include <linux/stop_machine.h> |
diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 903ffa9e8872..e036eda1a9c9 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c | |||
@@ -21,14 +21,17 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
21 | if (mask_str == NULL) | 21 | if (mask_str == NULL) |
22 | return -ENOMEM; | 22 | return -ENOMEM; |
23 | 23 | ||
24 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); | 24 | if (v == (void *)1) { |
25 | seq_printf(seq, "timestamp %lu\n", jiffies); | 25 | seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); |
26 | for_each_online_cpu(cpu) { | 26 | seq_printf(seq, "timestamp %lu\n", jiffies); |
27 | struct rq *rq = cpu_rq(cpu); | 27 | } else { |
28 | struct rq *rq; | ||
28 | #ifdef CONFIG_SMP | 29 | #ifdef CONFIG_SMP |
29 | struct sched_domain *sd; | 30 | struct sched_domain *sd; |
30 | int dcount = 0; | 31 | int dcount = 0; |
31 | #endif | 32 | #endif |
33 | cpu = (unsigned long)(v - 2); | ||
34 | rq = cpu_rq(cpu); | ||
32 | 35 | ||
33 | /* runqueue-specific stats */ | 36 | /* runqueue-specific stats */ |
34 | seq_printf(seq, | 37 | seq_printf(seq, |
@@ -77,30 +80,66 @@ static int show_schedstat(struct seq_file *seq, void *v) | |||
77 | return 0; | 80 | return 0; |
78 | } | 81 | } |
79 | 82 | ||
80 | static int schedstat_open(struct inode *inode, struct file *file) | 83 | /* |
84 | * This itererator needs some explanation. | ||
85 | * It returns 1 for the header position. | ||
86 | * This means 2 is cpu 0. | ||
87 | * In a hotplugged system some cpus, including cpu 0, may be missing so we have | ||
88 | * to use cpumask_* to iterate over the cpus. | ||
89 | */ | ||
90 | static void *schedstat_start(struct seq_file *file, loff_t *offset) | ||
81 | { | 91 | { |
82 | unsigned int size = PAGE_SIZE * (1 + num_online_cpus() / 32); | 92 | unsigned long n = *offset; |
83 | char *buf = kmalloc(size, GFP_KERNEL); | ||
84 | struct seq_file *m; | ||
85 | int res; | ||
86 | 93 | ||
87 | if (!buf) | 94 | if (n == 0) |
88 | return -ENOMEM; | 95 | return (void *) 1; |
89 | res = single_open(file, show_schedstat, NULL); | 96 | |
90 | if (!res) { | 97 | n--; |
91 | m = file->private_data; | 98 | |
92 | m->buf = buf; | 99 | if (n > 0) |
93 | m->size = size; | 100 | n = cpumask_next(n - 1, cpu_online_mask); |
94 | } else | 101 | else |
95 | kfree(buf); | 102 | n = cpumask_first(cpu_online_mask); |
96 | return res; | 103 | |
104 | *offset = n + 1; | ||
105 | |||
106 | if (n < nr_cpu_ids) | ||
107 | return (void *)(unsigned long)(n + 2); | ||
108 | return NULL; | ||
109 | } | ||
110 | |||
111 | static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) | ||
112 | { | ||
113 | (*offset)++; | ||
114 | return schedstat_start(file, offset); | ||
115 | } | ||
116 | |||
117 | static void schedstat_stop(struct seq_file *file, void *data) | ||
118 | { | ||
119 | } | ||
120 | |||
121 | static const struct seq_operations schedstat_sops = { | ||
122 | .start = schedstat_start, | ||
123 | .next = schedstat_next, | ||
124 | .stop = schedstat_stop, | ||
125 | .show = show_schedstat, | ||
126 | }; | ||
127 | |||
128 | static int schedstat_open(struct inode *inode, struct file *file) | ||
129 | { | ||
130 | return seq_open(file, &schedstat_sops); | ||
97 | } | 131 | } |
98 | 132 | ||
133 | static int schedstat_release(struct inode *inode, struct file *file) | ||
134 | { | ||
135 | return 0; | ||
136 | }; | ||
137 | |||
99 | static const struct file_operations proc_schedstat_operations = { | 138 | static const struct file_operations proc_schedstat_operations = { |
100 | .open = schedstat_open, | 139 | .open = schedstat_open, |
101 | .read = seq_read, | 140 | .read = seq_read, |
102 | .llseek = seq_lseek, | 141 | .llseek = seq_lseek, |
103 | .release = single_release, | 142 | .release = schedstat_release, |
104 | }; | 143 | }; |
105 | 144 | ||
106 | static int __init proc_schedstat_init(void) | 145 | static int __init proc_schedstat_init(void) |