aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c192
1 files changed, 73 insertions, 119 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 700aa9a1413f..3d1ee429219b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -703,45 +703,18 @@ static __read_mostly char *sched_feat_names[] = {
703 703
704#undef SCHED_FEAT 704#undef SCHED_FEAT
705 705
706static int sched_feat_open(struct inode *inode, struct file *filp) 706static int sched_feat_show(struct seq_file *m, void *v)
707{
708 filp->private_data = inode->i_private;
709 return 0;
710}
711
712static ssize_t
713sched_feat_read(struct file *filp, char __user *ubuf,
714 size_t cnt, loff_t *ppos)
715{ 707{
716 char *buf;
717 int r = 0;
718 int len = 0;
719 int i; 708 int i;
720 709
721 for (i = 0; sched_feat_names[i]; i++) { 710 for (i = 0; sched_feat_names[i]; i++) {
722 len += strlen(sched_feat_names[i]); 711 if (!(sysctl_sched_features & (1UL << i)))
723 len += 4; 712 seq_puts(m, "NO_");
724 } 713 seq_printf(m, "%s ", sched_feat_names[i]);
725
726 buf = kmalloc(len + 2, GFP_KERNEL);
727 if (!buf)
728 return -ENOMEM;
729
730 for (i = 0; sched_feat_names[i]; i++) {
731 if (sysctl_sched_features & (1UL << i))
732 r += sprintf(buf + r, "%s ", sched_feat_names[i]);
733 else
734 r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
735 } 714 }
715 seq_puts(m, "\n");
736 716
737 r += sprintf(buf + r, "\n"); 717 return 0;
738 WARN_ON(r >= len + 2);
739
740 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
741
742 kfree(buf);
743
744 return r;
745} 718}
746 719
747static ssize_t 720static ssize_t
@@ -786,10 +759,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
786 return cnt; 759 return cnt;
787} 760}
788 761
762static int sched_feat_open(struct inode *inode, struct file *filp)
763{
764 return single_open(filp, sched_feat_show, NULL);
765}
766
789static struct file_operations sched_feat_fops = { 767static struct file_operations sched_feat_fops = {
790 .open = sched_feat_open, 768 .open = sched_feat_open,
791 .read = sched_feat_read, 769 .write = sched_feat_write,
792 .write = sched_feat_write, 770 .read = seq_read,
771 .llseek = seq_lseek,
772 .release = single_release,
793}; 773};
794 774
795static __init int sched_init_debug(void) 775static __init int sched_init_debug(void)
@@ -1474,27 +1454,13 @@ static void
1474update_group_shares_cpu(struct task_group *tg, int cpu, 1454update_group_shares_cpu(struct task_group *tg, int cpu,
1475 unsigned long sd_shares, unsigned long sd_rq_weight) 1455 unsigned long sd_shares, unsigned long sd_rq_weight)
1476{ 1456{
1477 int boost = 0;
1478 unsigned long shares; 1457 unsigned long shares;
1479 unsigned long rq_weight; 1458 unsigned long rq_weight;
1480 1459
1481 if (!tg->se[cpu]) 1460 if (!tg->se[cpu])
1482 return; 1461 return;
1483 1462
1484 rq_weight = tg->cfs_rq[cpu]->load.weight; 1463 rq_weight = tg->cfs_rq[cpu]->rq_weight;
1485
1486 /*
1487 * If there are currently no tasks on the cpu pretend there is one of
1488 * average load so that when a new task gets to run here it will not
1489 * get delayed by group starvation.
1490 */
1491 if (!rq_weight) {
1492 boost = 1;
1493 rq_weight = NICE_0_LOAD;
1494 }
1495
1496 if (unlikely(rq_weight > sd_rq_weight))
1497 rq_weight = sd_rq_weight;
1498 1464
1499 /* 1465 /*
1500 * \Sum shares * rq_weight 1466 * \Sum shares * rq_weight
@@ -1502,7 +1468,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1502 * \Sum rq_weight 1468 * \Sum rq_weight
1503 * 1469 *
1504 */ 1470 */
1505 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1471 shares = (sd_shares * rq_weight) / sd_rq_weight;
1506 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); 1472 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1507 1473
1508 if (abs(shares - tg->se[cpu]->load.weight) > 1474 if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1511,11 +1477,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1511 unsigned long flags; 1477 unsigned long flags;
1512 1478
1513 spin_lock_irqsave(&rq->lock, flags); 1479 spin_lock_irqsave(&rq->lock, flags);
1514 /* 1480 tg->cfs_rq[cpu]->shares = shares;
1515 * record the actual number of shares, not the boosted amount.
1516 */
1517 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1518 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1519 1481
1520 __set_se_shares(tg->se[cpu], shares); 1482 __set_se_shares(tg->se[cpu], shares);
1521 spin_unlock_irqrestore(&rq->lock, flags); 1483 spin_unlock_irqrestore(&rq->lock, flags);
@@ -1529,13 +1491,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1529 */ 1491 */
1530static int tg_shares_up(struct task_group *tg, void *data) 1492static int tg_shares_up(struct task_group *tg, void *data)
1531{ 1493{
1532 unsigned long rq_weight = 0; 1494 unsigned long weight, rq_weight = 0;
1533 unsigned long shares = 0; 1495 unsigned long shares = 0;
1534 struct sched_domain *sd = data; 1496 struct sched_domain *sd = data;
1535 int i; 1497 int i;
1536 1498
1537 for_each_cpu_mask(i, sd->span) { 1499 for_each_cpu_mask(i, sd->span) {
1538 rq_weight += tg->cfs_rq[i]->load.weight; 1500 /*
1501 * If there are currently no tasks on the cpu pretend there
1502 * is one of average load so that when a new task gets to
1503 * run here it will not get delayed by group starvation.
1504 */
1505 weight = tg->cfs_rq[i]->load.weight;
1506 if (!weight)
1507 weight = NICE_0_LOAD;
1508
1509 tg->cfs_rq[i]->rq_weight = weight;
1510 rq_weight += weight;
1539 shares += tg->cfs_rq[i]->shares; 1511 shares += tg->cfs_rq[i]->shares;
1540 } 1512 }
1541 1513
@@ -1545,9 +1517,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
1545 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) 1517 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1546 shares = tg->shares; 1518 shares = tg->shares;
1547 1519
1548 if (!rq_weight)
1549 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1550
1551 for_each_cpu_mask(i, sd->span) 1520 for_each_cpu_mask(i, sd->span)
1552 update_group_shares_cpu(tg, i, shares, rq_weight); 1521 update_group_shares_cpu(tg, i, shares, rq_weight);
1553 1522
@@ -2838,7 +2807,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2838 return ret; 2807 return ret;
2839} 2808}
2840 2809
2841static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 2810static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2842 __releases(busiest->lock) 2811 __releases(busiest->lock)
2843{ 2812{
2844 spin_unlock(&busiest->lock); 2813 spin_unlock(&busiest->lock);
@@ -6126,7 +6095,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
6126 6095
6127/* 6096/*
6128 * Figure out where task on dead CPU should go, use force if necessary. 6097 * Figure out where task on dead CPU should go, use force if necessary.
6129 * NOTE: interrupts should be disabled by the caller
6130 */ 6098 */
6131static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 6099static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6132{ 6100{
@@ -6636,28 +6604,6 @@ early_initcall(migration_init);
6636 6604
6637#ifdef CONFIG_SCHED_DEBUG 6605#ifdef CONFIG_SCHED_DEBUG
6638 6606
6639static inline const char *sd_level_to_string(enum sched_domain_level lvl)
6640{
6641 switch (lvl) {
6642 case SD_LV_NONE:
6643 return "NONE";
6644 case SD_LV_SIBLING:
6645 return "SIBLING";
6646 case SD_LV_MC:
6647 return "MC";
6648 case SD_LV_CPU:
6649 return "CPU";
6650 case SD_LV_NODE:
6651 return "NODE";
6652 case SD_LV_ALLNODES:
6653 return "ALLNODES";
6654 case SD_LV_MAX:
6655 return "MAX";
6656
6657 }
6658 return "MAX";
6659}
6660
6661static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6607static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6662 cpumask_t *groupmask) 6608 cpumask_t *groupmask)
6663{ 6609{
@@ -6677,8 +6623,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6677 return -1; 6623 return -1;
6678 } 6624 }
6679 6625
6680 printk(KERN_CONT "span %s level %s\n", 6626 printk(KERN_CONT "span %s level %s\n", str, sd->name);
6681 str, sd_level_to_string(sd->level));
6682 6627
6683 if (!cpu_isset(cpu, sd->span)) { 6628 if (!cpu_isset(cpu, sd->span)) {
6684 printk(KERN_ERR "ERROR: domain->span does not contain " 6629 printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7334,13 +7279,21 @@ struct allmasks {
7334}; 7279};
7335 7280
7336#if NR_CPUS > 128 7281#if NR_CPUS > 128
7337#define SCHED_CPUMASK_ALLOC 1 7282#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
7338#define SCHED_CPUMASK_FREE(v) kfree(v) 7283static inline void sched_cpumask_alloc(struct allmasks **masks)
7339#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v 7284{
7285 *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
7286}
7287static inline void sched_cpumask_free(struct allmasks *masks)
7288{
7289 kfree(masks);
7290}
7340#else 7291#else
7341#define SCHED_CPUMASK_ALLOC 0 7292#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
7342#define SCHED_CPUMASK_FREE(v) 7293static inline void sched_cpumask_alloc(struct allmasks **masks)
7343#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v 7294{ }
7295static inline void sched_cpumask_free(struct allmasks *masks)
7296{ }
7344#endif 7297#endif
7345 7298
7346#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ 7299#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
@@ -7416,9 +7369,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7416 return -ENOMEM; 7369 return -ENOMEM;
7417 } 7370 }
7418 7371
7419#if SCHED_CPUMASK_ALLOC
7420 /* get space for all scratch cpumask variables */ 7372 /* get space for all scratch cpumask variables */
7421 allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); 7373 sched_cpumask_alloc(&allmasks);
7422 if (!allmasks) { 7374 if (!allmasks) {
7423 printk(KERN_WARNING "Cannot alloc cpumask array\n"); 7375 printk(KERN_WARNING "Cannot alloc cpumask array\n");
7424 kfree(rd); 7376 kfree(rd);
@@ -7427,7 +7379,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7427#endif 7379#endif
7428 return -ENOMEM; 7380 return -ENOMEM;
7429 } 7381 }
7430#endif 7382
7431 tmpmask = (cpumask_t *)allmasks; 7383 tmpmask = (cpumask_t *)allmasks;
7432 7384
7433 7385
@@ -7681,13 +7633,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7681 cpu_attach_domain(sd, rd, i); 7633 cpu_attach_domain(sd, rd, i);
7682 } 7634 }
7683 7635
7684 SCHED_CPUMASK_FREE((void *)allmasks); 7636 sched_cpumask_free(allmasks);
7685 return 0; 7637 return 0;
7686 7638
7687#ifdef CONFIG_NUMA 7639#ifdef CONFIG_NUMA
7688error: 7640error:
7689 free_sched_groups(cpu_map, tmpmask); 7641 free_sched_groups(cpu_map, tmpmask);
7690 SCHED_CPUMASK_FREE((void *)allmasks); 7642 sched_cpumask_free(allmasks);
7691 kfree(rd); 7643 kfree(rd);
7692 return -ENOMEM; 7644 return -ENOMEM;
7693#endif 7645#endif
@@ -7751,8 +7703,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7751 cpumask_t tmpmask; 7703 cpumask_t tmpmask;
7752 int i; 7704 int i;
7753 7705
7754 unregister_sched_domain_sysctl();
7755
7756 for_each_cpu_mask_nr(i, *cpu_map) 7706 for_each_cpu_mask_nr(i, *cpu_map)
7757 cpu_attach_domain(NULL, &def_root_domain, i); 7707 cpu_attach_domain(NULL, &def_root_domain, i);
7758 synchronize_sched(); 7708 synchronize_sched();
@@ -7830,7 +7780,7 @@ match1:
7830 ndoms_cur = 0; 7780 ndoms_cur = 0;
7831 doms_new = &fallback_doms; 7781 doms_new = &fallback_doms;
7832 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); 7782 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7833 dattr_new = NULL; 7783 WARN_ON_ONCE(dattr_new);
7834 } 7784 }
7835 7785
7836 /* Build new domains */ 7786 /* Build new domains */
@@ -8490,7 +8440,7 @@ static
8490int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) 8440int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8491{ 8441{
8492 struct cfs_rq *cfs_rq; 8442 struct cfs_rq *cfs_rq;
8493 struct sched_entity *se, *parent_se; 8443 struct sched_entity *se;
8494 struct rq *rq; 8444 struct rq *rq;
8495 int i; 8445 int i;
8496 8446
@@ -8506,18 +8456,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8506 for_each_possible_cpu(i) { 8456 for_each_possible_cpu(i) {
8507 rq = cpu_rq(i); 8457 rq = cpu_rq(i);
8508 8458
8509 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), 8459 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8510 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8460 GFP_KERNEL, cpu_to_node(i));
8511 if (!cfs_rq) 8461 if (!cfs_rq)
8512 goto err; 8462 goto err;
8513 8463
8514 se = kmalloc_node(sizeof(struct sched_entity), 8464 se = kzalloc_node(sizeof(struct sched_entity),
8515 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8465 GFP_KERNEL, cpu_to_node(i));
8516 if (!se) 8466 if (!se)
8517 goto err; 8467 goto err;
8518 8468
8519 parent_se = parent ? parent->se[i] : NULL; 8469 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
8520 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
8521 } 8470 }
8522 8471
8523 return 1; 8472 return 1;
@@ -8578,7 +8527,7 @@ static
8578int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 8527int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8579{ 8528{
8580 struct rt_rq *rt_rq; 8529 struct rt_rq *rt_rq;
8581 struct sched_rt_entity *rt_se, *parent_se; 8530 struct sched_rt_entity *rt_se;
8582 struct rq *rq; 8531 struct rq *rq;
8583 int i; 8532 int i;
8584 8533
@@ -8595,18 +8544,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8595 for_each_possible_cpu(i) { 8544 for_each_possible_cpu(i) {
8596 rq = cpu_rq(i); 8545 rq = cpu_rq(i);
8597 8546
8598 rt_rq = kmalloc_node(sizeof(struct rt_rq), 8547 rt_rq = kzalloc_node(sizeof(struct rt_rq),
8599 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8548 GFP_KERNEL, cpu_to_node(i));
8600 if (!rt_rq) 8549 if (!rt_rq)
8601 goto err; 8550 goto err;
8602 8551
8603 rt_se = kmalloc_node(sizeof(struct sched_rt_entity), 8552 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
8604 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8553 GFP_KERNEL, cpu_to_node(i));
8605 if (!rt_se) 8554 if (!rt_se)
8606 goto err; 8555 goto err;
8607 8556
8608 parent_se = parent ? parent->rt_se[i] : NULL; 8557 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
8609 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
8610 } 8558 }
8611 8559
8612 return 1; 8560 return 1;
@@ -9249,11 +9197,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
9249 * (balbir@in.ibm.com). 9197 * (balbir@in.ibm.com).
9250 */ 9198 */
9251 9199
9252/* track cpu usage of a group of tasks */ 9200/* track cpu usage of a group of tasks and its child groups */
9253struct cpuacct { 9201struct cpuacct {
9254 struct cgroup_subsys_state css; 9202 struct cgroup_subsys_state css;
9255 /* cpuusage holds pointer to a u64-type object on every cpu */ 9203 /* cpuusage holds pointer to a u64-type object on every cpu */
9256 u64 *cpuusage; 9204 u64 *cpuusage;
9205 struct cpuacct *parent;
9257}; 9206};
9258 9207
9259struct cgroup_subsys cpuacct_subsys; 9208struct cgroup_subsys cpuacct_subsys;
@@ -9287,6 +9236,9 @@ static struct cgroup_subsys_state *cpuacct_create(
9287 return ERR_PTR(-ENOMEM); 9236 return ERR_PTR(-ENOMEM);
9288 } 9237 }
9289 9238
9239 if (cgrp->parent)
9240 ca->parent = cgroup_ca(cgrp->parent);
9241
9290 return &ca->css; 9242 return &ca->css;
9291} 9243}
9292 9244
@@ -9366,14 +9318,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
9366static void cpuacct_charge(struct task_struct *tsk, u64 cputime) 9318static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9367{ 9319{
9368 struct cpuacct *ca; 9320 struct cpuacct *ca;
9321 int cpu;
9369 9322
9370 if (!cpuacct_subsys.active) 9323 if (!cpuacct_subsys.active)
9371 return; 9324 return;
9372 9325
9326 cpu = task_cpu(tsk);
9373 ca = task_ca(tsk); 9327 ca = task_ca(tsk);
9374 if (ca) {
9375 u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
9376 9328
9329 for (; ca; ca = ca->parent) {
9330 u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
9377 *cpuusage += cputime; 9331 *cpuusage += cputime;
9378 } 9332 }
9379} 9333}