diff options
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r-- | kernel/sched/core.c | 146 |
1 files changed, 115 insertions, 31 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2d8927fda712..257002c13bb0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -72,6 +72,7 @@ | |||
72 | #include <linux/slab.h> | 72 | #include <linux/slab.h> |
73 | #include <linux/init_task.h> | 73 | #include <linux/init_task.h> |
74 | #include <linux/binfmts.h> | 74 | #include <linux/binfmts.h> |
75 | #include <linux/context_tracking.h> | ||
75 | 76 | ||
76 | #include <asm/switch_to.h> | 77 | #include <asm/switch_to.h> |
77 | #include <asm/tlb.h> | 78 | #include <asm/tlb.h> |
@@ -192,23 +193,10 @@ static void sched_feat_disable(int i) { }; | |||
192 | static void sched_feat_enable(int i) { }; | 193 | static void sched_feat_enable(int i) { }; |
193 | #endif /* HAVE_JUMP_LABEL */ | 194 | #endif /* HAVE_JUMP_LABEL */ |
194 | 195 | ||
195 | static ssize_t | 196 | static int sched_feat_set(char *cmp) |
196 | sched_feat_write(struct file *filp, const char __user *ubuf, | ||
197 | size_t cnt, loff_t *ppos) | ||
198 | { | 197 | { |
199 | char buf[64]; | ||
200 | char *cmp; | ||
201 | int neg = 0; | ||
202 | int i; | 198 | int i; |
203 | 199 | int neg = 0; | |
204 | if (cnt > 63) | ||
205 | cnt = 63; | ||
206 | |||
207 | if (copy_from_user(&buf, ubuf, cnt)) | ||
208 | return -EFAULT; | ||
209 | |||
210 | buf[cnt] = 0; | ||
211 | cmp = strstrip(buf); | ||
212 | 200 | ||
213 | if (strncmp(cmp, "NO_", 3) == 0) { | 201 | if (strncmp(cmp, "NO_", 3) == 0) { |
214 | neg = 1; | 202 | neg = 1; |
@@ -228,6 +216,27 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
228 | } | 216 | } |
229 | } | 217 | } |
230 | 218 | ||
219 | return i; | ||
220 | } | ||
221 | |||
222 | static ssize_t | ||
223 | sched_feat_write(struct file *filp, const char __user *ubuf, | ||
224 | size_t cnt, loff_t *ppos) | ||
225 | { | ||
226 | char buf[64]; | ||
227 | char *cmp; | ||
228 | int i; | ||
229 | |||
230 | if (cnt > 63) | ||
231 | cnt = 63; | ||
232 | |||
233 | if (copy_from_user(&buf, ubuf, cnt)) | ||
234 | return -EFAULT; | ||
235 | |||
236 | buf[cnt] = 0; | ||
237 | cmp = strstrip(buf); | ||
238 | |||
239 | i = sched_feat_set(cmp); | ||
231 | if (i == __SCHED_FEAT_NR) | 240 | if (i == __SCHED_FEAT_NR) |
232 | return -EINVAL; | 241 | return -EINVAL; |
233 | 242 | ||
@@ -922,6 +931,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
922 | rq->skip_clock_update = 1; | 931 | rq->skip_clock_update = 1; |
923 | } | 932 | } |
924 | 933 | ||
934 | static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); | ||
935 | |||
936 | void register_task_migration_notifier(struct notifier_block *n) | ||
937 | { | ||
938 | atomic_notifier_chain_register(&task_migration_notifier, n); | ||
939 | } | ||
940 | |||
925 | #ifdef CONFIG_SMP | 941 | #ifdef CONFIG_SMP |
926 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 942 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
927 | { | 943 | { |
@@ -952,8 +968,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
952 | trace_sched_migrate_task(p, new_cpu); | 968 | trace_sched_migrate_task(p, new_cpu); |
953 | 969 | ||
954 | if (task_cpu(p) != new_cpu) { | 970 | if (task_cpu(p) != new_cpu) { |
971 | struct task_migration_notifier tmn; | ||
972 | |||
973 | if (p->sched_class->migrate_task_rq) | ||
974 | p->sched_class->migrate_task_rq(p, new_cpu); | ||
955 | p->se.nr_migrations++; | 975 | p->se.nr_migrations++; |
956 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); | 976 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); |
977 | |||
978 | tmn.task = p; | ||
979 | tmn.from_cpu = task_cpu(p); | ||
980 | tmn.to_cpu = new_cpu; | ||
981 | |||
982 | atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); | ||
957 | } | 983 | } |
958 | 984 | ||
959 | __set_task_cpu(p, new_cpu); | 985 | __set_task_cpu(p, new_cpu); |
@@ -1524,6 +1550,15 @@ static void __sched_fork(struct task_struct *p) | |||
1524 | p->se.vruntime = 0; | 1550 | p->se.vruntime = 0; |
1525 | INIT_LIST_HEAD(&p->se.group_node); | 1551 | INIT_LIST_HEAD(&p->se.group_node); |
1526 | 1552 | ||
1553 | /* | ||
1554 | * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be | ||
1555 | * removed when useful for applications beyond shares distribution (e.g. | ||
1556 | * load-balance). | ||
1557 | */ | ||
1558 | #if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED) | ||
1559 | p->se.avg.runnable_avg_period = 0; | ||
1560 | p->se.avg.runnable_avg_sum = 0; | ||
1561 | #endif | ||
1527 | #ifdef CONFIG_SCHEDSTATS | 1562 | #ifdef CONFIG_SCHEDSTATS |
1528 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 1563 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
1529 | #endif | 1564 | #endif |
@@ -1533,7 +1568,40 @@ static void __sched_fork(struct task_struct *p) | |||
1533 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 1568 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
1534 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 1569 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
1535 | #endif | 1570 | #endif |
1571 | |||
1572 | #ifdef CONFIG_NUMA_BALANCING | ||
1573 | if (p->mm && atomic_read(&p->mm->mm_users) == 1) { | ||
1574 | p->mm->numa_next_scan = jiffies; | ||
1575 | p->mm->numa_next_reset = jiffies; | ||
1576 | p->mm->numa_scan_seq = 0; | ||
1577 | } | ||
1578 | |||
1579 | p->node_stamp = 0ULL; | ||
1580 | p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0; | ||
1581 | p->numa_migrate_seq = p->mm ? p->mm->numa_scan_seq - 1 : 0; | ||
1582 | p->numa_scan_period = sysctl_numa_balancing_scan_delay; | ||
1583 | p->numa_work.next = &p->numa_work; | ||
1584 | #endif /* CONFIG_NUMA_BALANCING */ | ||
1585 | } | ||
1586 | |||
1587 | #ifdef CONFIG_NUMA_BALANCING | ||
1588 | #ifdef CONFIG_SCHED_DEBUG | ||
1589 | void set_numabalancing_state(bool enabled) | ||
1590 | { | ||
1591 | if (enabled) | ||
1592 | sched_feat_set("NUMA"); | ||
1593 | else | ||
1594 | sched_feat_set("NO_NUMA"); | ||
1536 | } | 1595 | } |
1596 | #else | ||
1597 | __read_mostly bool numabalancing_enabled; | ||
1598 | |||
1599 | void set_numabalancing_state(bool enabled) | ||
1600 | { | ||
1601 | numabalancing_enabled = enabled; | ||
1602 | } | ||
1603 | #endif /* CONFIG_SCHED_DEBUG */ | ||
1604 | #endif /* CONFIG_NUMA_BALANCING */ | ||
1537 | 1605 | ||
1538 | /* | 1606 | /* |
1539 | * fork()/clone()-time setup: | 1607 | * fork()/clone()-time setup: |
@@ -1886,8 +1954,8 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1886 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 1954 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
1887 | #endif | 1955 | #endif |
1888 | 1956 | ||
1957 | context_tracking_task_switch(prev, next); | ||
1889 | /* Here we just switch the register state and the stack. */ | 1958 | /* Here we just switch the register state and the stack. */ |
1890 | rcu_switch(prev, next); | ||
1891 | switch_to(prev, next, prev); | 1959 | switch_to(prev, next, prev); |
1892 | 1960 | ||
1893 | barrier(); | 1961 | barrier(); |
@@ -2911,7 +2979,7 @@ asmlinkage void __sched schedule(void) | |||
2911 | } | 2979 | } |
2912 | EXPORT_SYMBOL(schedule); | 2980 | EXPORT_SYMBOL(schedule); |
2913 | 2981 | ||
2914 | #ifdef CONFIG_RCU_USER_QS | 2982 | #ifdef CONFIG_CONTEXT_TRACKING |
2915 | asmlinkage void __sched schedule_user(void) | 2983 | asmlinkage void __sched schedule_user(void) |
2916 | { | 2984 | { |
2917 | /* | 2985 | /* |
@@ -2920,9 +2988,9 @@ asmlinkage void __sched schedule_user(void) | |||
2920 | * we haven't yet exited the RCU idle mode. Do it here manually until | 2988 | * we haven't yet exited the RCU idle mode. Do it here manually until |
2921 | * we find a better solution. | 2989 | * we find a better solution. |
2922 | */ | 2990 | */ |
2923 | rcu_user_exit(); | 2991 | user_exit(); |
2924 | schedule(); | 2992 | schedule(); |
2925 | rcu_user_enter(); | 2993 | user_enter(); |
2926 | } | 2994 | } |
2927 | #endif | 2995 | #endif |
2928 | 2996 | ||
@@ -3027,7 +3095,7 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
3027 | /* Catch callers which need to be fixed */ | 3095 | /* Catch callers which need to be fixed */ |
3028 | BUG_ON(ti->preempt_count || !irqs_disabled()); | 3096 | BUG_ON(ti->preempt_count || !irqs_disabled()); |
3029 | 3097 | ||
3030 | rcu_user_exit(); | 3098 | user_exit(); |
3031 | do { | 3099 | do { |
3032 | add_preempt_count(PREEMPT_ACTIVE); | 3100 | add_preempt_count(PREEMPT_ACTIVE); |
3033 | local_irq_enable(); | 3101 | local_irq_enable(); |
@@ -4029,8 +4097,14 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4029 | goto out_free_cpus_allowed; | 4097 | goto out_free_cpus_allowed; |
4030 | } | 4098 | } |
4031 | retval = -EPERM; | 4099 | retval = -EPERM; |
4032 | if (!check_same_owner(p) && !ns_capable(task_user_ns(p), CAP_SYS_NICE)) | 4100 | if (!check_same_owner(p)) { |
4033 | goto out_unlock; | 4101 | rcu_read_lock(); |
4102 | if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { | ||
4103 | rcu_read_unlock(); | ||
4104 | goto out_unlock; | ||
4105 | } | ||
4106 | rcu_read_unlock(); | ||
4107 | } | ||
4034 | 4108 | ||
4035 | retval = security_task_setscheduler(p); | 4109 | retval = security_task_setscheduler(p); |
4036 | if (retval) | 4110 | if (retval) |
@@ -4474,6 +4548,7 @@ static const char stat_nam[] = TASK_STATE_TO_CHAR_STR; | |||
4474 | void sched_show_task(struct task_struct *p) | 4548 | void sched_show_task(struct task_struct *p) |
4475 | { | 4549 | { |
4476 | unsigned long free = 0; | 4550 | unsigned long free = 0; |
4551 | int ppid; | ||
4477 | unsigned state; | 4552 | unsigned state; |
4478 | 4553 | ||
4479 | state = p->state ? __ffs(p->state) + 1 : 0; | 4554 | state = p->state ? __ffs(p->state) + 1 : 0; |
@@ -4493,8 +4568,11 @@ void sched_show_task(struct task_struct *p) | |||
4493 | #ifdef CONFIG_DEBUG_STACK_USAGE | 4568 | #ifdef CONFIG_DEBUG_STACK_USAGE |
4494 | free = stack_not_used(p); | 4569 | free = stack_not_used(p); |
4495 | #endif | 4570 | #endif |
4571 | rcu_read_lock(); | ||
4572 | ppid = task_pid_nr(rcu_dereference(p->real_parent)); | ||
4573 | rcu_read_unlock(); | ||
4496 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, | 4574 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
4497 | task_pid_nr(p), task_pid_nr(rcu_dereference(p->real_parent)), | 4575 | task_pid_nr(p), ppid, |
4498 | (unsigned long)task_thread_info(p)->flags); | 4576 | (unsigned long)task_thread_info(p)->flags); |
4499 | 4577 | ||
4500 | show_stack(p, NULL); | 4578 | show_stack(p, NULL); |
@@ -7468,7 +7546,7 @@ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) | |||
7468 | struct task_group, css); | 7546 | struct task_group, css); |
7469 | } | 7547 | } |
7470 | 7548 | ||
7471 | static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) | 7549 | static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) |
7472 | { | 7550 | { |
7473 | struct task_group *tg, *parent; | 7551 | struct task_group *tg, *parent; |
7474 | 7552 | ||
@@ -7485,7 +7563,7 @@ static struct cgroup_subsys_state *cpu_cgroup_create(struct cgroup *cgrp) | |||
7485 | return &tg->css; | 7563 | return &tg->css; |
7486 | } | 7564 | } |
7487 | 7565 | ||
7488 | static void cpu_cgroup_destroy(struct cgroup *cgrp) | 7566 | static void cpu_cgroup_css_free(struct cgroup *cgrp) |
7489 | { | 7567 | { |
7490 | struct task_group *tg = cgroup_tg(cgrp); | 7568 | struct task_group *tg = cgroup_tg(cgrp); |
7491 | 7569 | ||
@@ -7845,8 +7923,8 @@ static struct cftype cpu_files[] = { | |||
7845 | 7923 | ||
7846 | struct cgroup_subsys cpu_cgroup_subsys = { | 7924 | struct cgroup_subsys cpu_cgroup_subsys = { |
7847 | .name = "cpu", | 7925 | .name = "cpu", |
7848 | .create = cpu_cgroup_create, | 7926 | .css_alloc = cpu_cgroup_css_alloc, |
7849 | .destroy = cpu_cgroup_destroy, | 7927 | .css_free = cpu_cgroup_css_free, |
7850 | .can_attach = cpu_cgroup_can_attach, | 7928 | .can_attach = cpu_cgroup_can_attach, |
7851 | .attach = cpu_cgroup_attach, | 7929 | .attach = cpu_cgroup_attach, |
7852 | .exit = cpu_cgroup_exit, | 7930 | .exit = cpu_cgroup_exit, |
@@ -7869,7 +7947,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
7869 | struct cpuacct root_cpuacct; | 7947 | struct cpuacct root_cpuacct; |
7870 | 7948 | ||
7871 | /* create a new cpu accounting group */ | 7949 | /* create a new cpu accounting group */ |
7872 | static struct cgroup_subsys_state *cpuacct_create(struct cgroup *cgrp) | 7950 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) |
7873 | { | 7951 | { |
7874 | struct cpuacct *ca; | 7952 | struct cpuacct *ca; |
7875 | 7953 | ||
@@ -7899,7 +7977,7 @@ out: | |||
7899 | } | 7977 | } |
7900 | 7978 | ||
7901 | /* destroy an existing cpu accounting group */ | 7979 | /* destroy an existing cpu accounting group */ |
7902 | static void cpuacct_destroy(struct cgroup *cgrp) | 7980 | static void cpuacct_css_free(struct cgroup *cgrp) |
7903 | { | 7981 | { |
7904 | struct cpuacct *ca = cgroup_ca(cgrp); | 7982 | struct cpuacct *ca = cgroup_ca(cgrp); |
7905 | 7983 | ||
@@ -8070,9 +8148,15 @@ void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
8070 | 8148 | ||
8071 | struct cgroup_subsys cpuacct_subsys = { | 8149 | struct cgroup_subsys cpuacct_subsys = { |
8072 | .name = "cpuacct", | 8150 | .name = "cpuacct", |
8073 | .create = cpuacct_create, | 8151 | .css_alloc = cpuacct_css_alloc, |
8074 | .destroy = cpuacct_destroy, | 8152 | .css_free = cpuacct_css_free, |
8075 | .subsys_id = cpuacct_subsys_id, | 8153 | .subsys_id = cpuacct_subsys_id, |
8076 | .base_cftypes = files, | 8154 | .base_cftypes = files, |
8077 | }; | 8155 | }; |
8078 | #endif /* CONFIG_CGROUP_CPUACCT */ | 8156 | #endif /* CONFIG_CGROUP_CPUACCT */ |
8157 | |||
8158 | void dump_cpu_task(int cpu) | ||
8159 | { | ||
8160 | pr_info("Task dump for CPU %d:\n", cpu); | ||
8161 | sched_show_task(cpu_curr(cpu)); | ||
8162 | } | ||