aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/controllers/cpuacct.txt32
-rw-r--r--Documentation/scheduler/sched-arch.txt4
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--include/asm-m32r/system.h2
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/sched.c267
-rw-r--r--kernel/sched_debug.c57
-rw-r--r--kernel/sched_rt.c7
-rw-r--r--kernel/user.c2
14 files changed, 205 insertions, 184 deletions
diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt
new file mode 100644
index 000000000000..bb775fbe43d7
--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
1CPU Accounting Controller
2-------------------------
3
4The CPU accounting controller is used to group tasks using cgroups and
5account the CPU usage of these groups of tasks.
6
7The CPU accounting controller supports multi-hierarchy groups. An accounting
8group accumulates the CPU usage of all of its child groups and the tasks
9directly present in its group.
10
11Accounting groups can be created by first mounting the cgroup filesystem.
12
13# mkdir /cgroups
14# mount -t cgroup -ocpuacct none /cgroups
15
16With the above step, the initial or the parent accounting group
17becomes visible at /cgroups. At bootup, this group includes all the
18tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
19/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
20this group which is essentially the CPU time obtained by all the tasks
21in the system.
22
23New accounting groups can be created under the parent group /cgroups.
24
25# cd /cgroups
26# mkdir g1
27# echo $$ > g1
28
29The above steps create a new group g1 and move the current shell
30process (bash) into it. CPU time consumed by this bash and its children
31can be obtained from g1/cpuacct.usage and the same is accumulated in
32/cgroups/cpuacct.usage also.
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 941615a9769b..d43dbcbd163b 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -8,7 +8,7 @@ Context switch
8By default, the switch_to arch function is called with the runqueue 8By default, the switch_to arch function is called with the runqueue
9locked. This is usually not a problem unless switch_to may need to 9locked. This is usually not a problem unless switch_to may need to
10take the runqueue lock. This is usually due to a wake up operation in 10take the runqueue lock. This is usually due to a wake up operation in
11the context switch. See include/asm-ia64/system.h for an example. 11the context switch. See arch/ia64/include/asm/system.h for an example.
12 12
13To request the scheduler call switch_to with the runqueue unlocked, 13To request the scheduler call switch_to with the runqueue unlocked,
14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file 14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
@@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
23introduce a significant interrupt latency by adding the line 23introduce a significant interrupt latency by adding the line
24`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for 24`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
25unlocked context switches. This define also implies 25unlocked context switches. This define also implies
26`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an 26`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
27example. 27example.
28 28
29 29
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6bd91ed7cd03..7fa8f615ba6e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -99,7 +99,7 @@ config GENERIC_IOMAP
99 bool 99 bool
100 default y 100 default y
101 101
102config SCHED_NO_NO_OMIT_FRAME_POINTER 102config SCHED_OMIT_FRAME_POINTER
103 bool 103 bool
104 default y 104 default y
105 105
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index dbaed4a63815..29047d5c259a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
273 bool 273 bool
274 default y 274 default y
275 275
276config SCHED_NO_NO_OMIT_FRAME_POINTER 276config SCHED_OMIT_FRAME_POINTER
277 bool 277 bool
278 default y 278 default y
279 279
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f4af967a6b30..a5255e7c79e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
653 bool 653 bool
654 default y 654 default y
655 655
656config SCHED_NO_NO_OMIT_FRAME_POINTER 656config SCHED_OMIT_FRAME_POINTER
657 bool 657 bool
658 default y 658 default y
659 659
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 525c13a4de93..adb23ea1c1ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,7 +141,7 @@ config GENERIC_NVRAM
141 bool 141 bool
142 default y if PPC32 142 default y if PPC32
143 143
144config SCHED_NO_NO_OMIT_FRAME_POINTER 144config SCHED_OMIT_FRAME_POINTER
145 bool 145 bool
146 default y 146 default y
147 147
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ac22bb7719f7..7b7d2764a215 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -367,10 +367,10 @@ config X86_RDC321X
367 as R-8610-(G). 367 as R-8610-(G).
368 If you don't have one of these chips, you should say N here. 368 If you don't have one of these chips, you should say N here.
369 369
370config SCHED_NO_NO_OMIT_FRAME_POINTER 370config SCHED_OMIT_FRAME_POINTER
371 def_bool y 371 def_bool y
372 prompt "Single-depth WCHAN output" 372 prompt "Single-depth WCHAN output"
373 depends on X86_32 373 depends on X86
374 help 374 help
375 Calculate simpler /proc/<PID>/wchan values. If this option 375 Calculate simpler /proc/<PID>/wchan values. If this option
376 is disabled then wchan values will recurse back to the 376 is disabled then wchan values will recurse back to the
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 70a57c8c002b..c980f5ba8de7 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -23,7 +23,7 @@
23 */ 23 */
24 24
25#if defined(CONFIG_FRAME_POINTER) || \ 25#if defined(CONFIG_FRAME_POINTER) || \
26 !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER) 26 !defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
27#define M32R_PUSH_FP " push fp\n" 27#define M32R_PUSH_FP " push fp\n"
28#define M32R_POP_FP " pop fp\n" 28#define M32R_POP_FP " pop fp\n"
29#else 29#else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 644ffbda17ca..d8733f07d80b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -259,8 +259,6 @@ static inline int select_nohz_load_balancer(int cpu)
259} 259}
260#endif 260#endif
261 261
262extern unsigned long rt_needs_cpu(int cpu);
263
264/* 262/*
265 * Only dump TASK_* tasks. (0 for all tasks) 263 * Only dump TASK_* tasks. (0 for all tasks)
266 */ 264 */
@@ -2220,6 +2218,7 @@ extern void normalize_rt_tasks(void);
2220extern struct task_group init_task_group; 2218extern struct task_group init_task_group;
2221#ifdef CONFIG_USER_SCHED 2219#ifdef CONFIG_USER_SCHED
2222extern struct task_group root_task_group; 2220extern struct task_group root_task_group;
2221extern void set_tg_uid(struct user_struct *user);
2223#endif 2222#endif
2224 2223
2225extern struct task_group *sched_create_group(struct task_group *parent); 2224extern struct task_group *sched_create_group(struct task_group *parent);
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..6a212b842d86 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -19,7 +19,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
19CFLAGS_REMOVE_rtmutex-debug.o = -pg 19CFLAGS_REMOVE_rtmutex-debug.o = -pg
20CFLAGS_REMOVE_cgroup-debug.o = -pg 20CFLAGS_REMOVE_cgroup-debug.o = -pg
21CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
22CFLAGS_REMOVE_sched.o = -pg
23endif 22endif
24 23
25obj-$(CONFIG_FREEZER) += freezer.o 24obj-$(CONFIG_FREEZER) += freezer.o
@@ -90,7 +89,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
90obj-$(CONFIG_TRACING) += trace/ 89obj-$(CONFIG_TRACING) += trace/
91obj-$(CONFIG_SMP) += sched_cpupri.o 90obj-$(CONFIG_SMP) += sched_cpupri.o
92 91
93ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 92ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
94# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 93# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
95# needed for x86 only. Why this used to be enabled for all architectures is beyond 94# needed for x86 only. Why this used to be enabled for all architectures is beyond
96# me. I suspect most platforms don't need this, but until we know that for sure 95# me. I suspect most platforms don't need this, but until we know that for sure
diff --git a/kernel/sched.c b/kernel/sched.c
index b7480fb5c3dc..152828239ef0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -261,6 +261,10 @@ struct task_group {
261 struct cgroup_subsys_state css; 261 struct cgroup_subsys_state css;
262#endif 262#endif
263 263
264#ifdef CONFIG_USER_SCHED
265 uid_t uid;
266#endif
267
264#ifdef CONFIG_FAIR_GROUP_SCHED 268#ifdef CONFIG_FAIR_GROUP_SCHED
265 /* schedulable entities of this group on each cpu */ 269 /* schedulable entities of this group on each cpu */
266 struct sched_entity **se; 270 struct sched_entity **se;
@@ -286,6 +290,12 @@ struct task_group {
286 290
287#ifdef CONFIG_USER_SCHED 291#ifdef CONFIG_USER_SCHED
288 292
293/* Helper function to pass uid information to create_sched_user() */
294void set_tg_uid(struct user_struct *user)
295{
296 user->tg->uid = user->uid;
297}
298
289/* 299/*
290 * Root task group. 300 * Root task group.
291 * Every UID task group (including init_task_group aka UID-0) will 301 * Every UID task group (including init_task_group aka UID-0) will
@@ -703,45 +713,18 @@ static __read_mostly char *sched_feat_names[] = {
703 713
704#undef SCHED_FEAT 714#undef SCHED_FEAT
705 715
706static int sched_feat_open(struct inode *inode, struct file *filp) 716static int sched_feat_show(struct seq_file *m, void *v)
707{ 717{
708 filp->private_data = inode->i_private;
709 return 0;
710}
711
712static ssize_t
713sched_feat_read(struct file *filp, char __user *ubuf,
714 size_t cnt, loff_t *ppos)
715{
716 char *buf;
717 int r = 0;
718 int len = 0;
719 int i; 718 int i;
720 719
721 for (i = 0; sched_feat_names[i]; i++) { 720 for (i = 0; sched_feat_names[i]; i++) {
722 len += strlen(sched_feat_names[i]); 721 if (!(sysctl_sched_features & (1UL << i)))
723 len += 4; 722 seq_puts(m, "NO_");
724 } 723 seq_printf(m, "%s ", sched_feat_names[i]);
725
726 buf = kmalloc(len + 2, GFP_KERNEL);
727 if (!buf)
728 return -ENOMEM;
729
730 for (i = 0; sched_feat_names[i]; i++) {
731 if (sysctl_sched_features & (1UL << i))
732 r += sprintf(buf + r, "%s ", sched_feat_names[i]);
733 else
734 r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
735 } 724 }
725 seq_puts(m, "\n");
736 726
737 r += sprintf(buf + r, "\n"); 727 return 0;
738 WARN_ON(r >= len + 2);
739
740 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
741
742 kfree(buf);
743
744 return r;
745} 728}
746 729
747static ssize_t 730static ssize_t
@@ -786,10 +769,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
786 return cnt; 769 return cnt;
787} 770}
788 771
772static int sched_feat_open(struct inode *inode, struct file *filp)
773{
774 return single_open(filp, sched_feat_show, NULL);
775}
776
789static struct file_operations sched_feat_fops = { 777static struct file_operations sched_feat_fops = {
790 .open = sched_feat_open, 778 .open = sched_feat_open,
791 .read = sched_feat_read, 779 .write = sched_feat_write,
792 .write = sched_feat_write, 780 .read = seq_read,
781 .llseek = seq_lseek,
782 .release = single_release,
793}; 783};
794 784
795static __init int sched_init_debug(void) 785static __init int sched_init_debug(void)
@@ -1474,27 +1464,13 @@ static void
1474update_group_shares_cpu(struct task_group *tg, int cpu, 1464update_group_shares_cpu(struct task_group *tg, int cpu,
1475 unsigned long sd_shares, unsigned long sd_rq_weight) 1465 unsigned long sd_shares, unsigned long sd_rq_weight)
1476{ 1466{
1477 int boost = 0;
1478 unsigned long shares; 1467 unsigned long shares;
1479 unsigned long rq_weight; 1468 unsigned long rq_weight;
1480 1469
1481 if (!tg->se[cpu]) 1470 if (!tg->se[cpu])
1482 return; 1471 return;
1483 1472
1484 rq_weight = tg->cfs_rq[cpu]->load.weight; 1473 rq_weight = tg->cfs_rq[cpu]->rq_weight;
1485
1486 /*
1487 * If there are currently no tasks on the cpu pretend there is one of
1488 * average load so that when a new task gets to run here it will not
1489 * get delayed by group starvation.
1490 */
1491 if (!rq_weight) {
1492 boost = 1;
1493 rq_weight = NICE_0_LOAD;
1494 }
1495
1496 if (unlikely(rq_weight > sd_rq_weight))
1497 rq_weight = sd_rq_weight;
1498 1474
1499 /* 1475 /*
1500 * \Sum shares * rq_weight 1476 * \Sum shares * rq_weight
@@ -1502,7 +1478,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1502 * \Sum rq_weight 1478 * \Sum rq_weight
1503 * 1479 *
1504 */ 1480 */
1505 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1481 shares = (sd_shares * rq_weight) / sd_rq_weight;
1506 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); 1482 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1507 1483
1508 if (abs(shares - tg->se[cpu]->load.weight) > 1484 if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1511,11 +1487,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1511 unsigned long flags; 1487 unsigned long flags;
1512 1488
1513 spin_lock_irqsave(&rq->lock, flags); 1489 spin_lock_irqsave(&rq->lock, flags);
1514 /* 1490 tg->cfs_rq[cpu]->shares = shares;
1515 * record the actual number of shares, not the boosted amount.
1516 */
1517 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1518 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1519 1491
1520 __set_se_shares(tg->se[cpu], shares); 1492 __set_se_shares(tg->se[cpu], shares);
1521 spin_unlock_irqrestore(&rq->lock, flags); 1493 spin_unlock_irqrestore(&rq->lock, flags);
@@ -1529,13 +1501,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1529 */ 1501 */
1530static int tg_shares_up(struct task_group *tg, void *data) 1502static int tg_shares_up(struct task_group *tg, void *data)
1531{ 1503{
1532 unsigned long rq_weight = 0; 1504 unsigned long weight, rq_weight = 0;
1533 unsigned long shares = 0; 1505 unsigned long shares = 0;
1534 struct sched_domain *sd = data; 1506 struct sched_domain *sd = data;
1535 int i; 1507 int i;
1536 1508
1537 for_each_cpu_mask(i, sd->span) { 1509 for_each_cpu_mask(i, sd->span) {
1538 rq_weight += tg->cfs_rq[i]->load.weight; 1510 /*
1511 * If there are currently no tasks on the cpu pretend there
1512 * is one of average load so that when a new task gets to
1513 * run here it will not get delayed by group starvation.
1514 */
1515 weight = tg->cfs_rq[i]->load.weight;
1516 if (!weight)
1517 weight = NICE_0_LOAD;
1518
1519 tg->cfs_rq[i]->rq_weight = weight;
1520 rq_weight += weight;
1539 shares += tg->cfs_rq[i]->shares; 1521 shares += tg->cfs_rq[i]->shares;
1540 } 1522 }
1541 1523
@@ -1545,9 +1527,6 @@ static int tg_shares_up(struct task_group *tg, void *data)
1545 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) 1527 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1546 shares = tg->shares; 1528 shares = tg->shares;
1547 1529
1548 if (!rq_weight)
1549 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1550
1551 for_each_cpu_mask(i, sd->span) 1530 for_each_cpu_mask(i, sd->span)
1552 update_group_shares_cpu(tg, i, shares, rq_weight); 1531 update_group_shares_cpu(tg, i, shares, rq_weight);
1553 1532
@@ -1612,6 +1591,39 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1612 1591
1613#endif 1592#endif
1614 1593
1594/*
1595 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1596 */
1597static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
1598 __releases(this_rq->lock)
1599 __acquires(busiest->lock)
1600 __acquires(this_rq->lock)
1601{
1602 int ret = 0;
1603
1604 if (unlikely(!irqs_disabled())) {
1605 /* printk() doesn't work good under rq->lock */
1606 spin_unlock(&this_rq->lock);
1607 BUG_ON(1);
1608 }
1609 if (unlikely(!spin_trylock(&busiest->lock))) {
1610 if (busiest < this_rq) {
1611 spin_unlock(&this_rq->lock);
1612 spin_lock(&busiest->lock);
1613 spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
1614 ret = 1;
1615 } else
1616 spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
1617 }
1618 return ret;
1619}
1620
1621static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
1622 __releases(busiest->lock)
1623{
1624 spin_unlock(&busiest->lock);
1625 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
1626}
1615#endif 1627#endif
1616 1628
1617#ifdef CONFIG_FAIR_GROUP_SCHED 1629#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -2812,40 +2824,6 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
2812} 2824}
2813 2825
2814/* 2826/*
2815 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
2816 */
2817static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2818 __releases(this_rq->lock)
2819 __acquires(busiest->lock)
2820 __acquires(this_rq->lock)
2821{
2822 int ret = 0;
2823
2824 if (unlikely(!irqs_disabled())) {
2825 /* printk() doesn't work good under rq->lock */
2826 spin_unlock(&this_rq->lock);
2827 BUG_ON(1);
2828 }
2829 if (unlikely(!spin_trylock(&busiest->lock))) {
2830 if (busiest < this_rq) {
2831 spin_unlock(&this_rq->lock);
2832 spin_lock(&busiest->lock);
2833 spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING);
2834 ret = 1;
2835 } else
2836 spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING);
2837 }
2838 return ret;
2839}
2840
2841static void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2842 __releases(busiest->lock)
2843{
2844 spin_unlock(&busiest->lock);
2845 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
2846}
2847
2848/*
2849 * If dest_cpu is allowed for this process, migrate the task to it. 2827 * If dest_cpu is allowed for this process, migrate the task to it.
2850 * This is accomplished by forcing the cpu_allowed mask to only 2828 * This is accomplished by forcing the cpu_allowed mask to only
2851 * allow dest_cpu, which will force the cpu onto dest_cpu. Then 2829 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
@@ -6126,7 +6104,6 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
6126 6104
6127/* 6105/*
6128 * Figure out where task on dead CPU should go, use force if necessary. 6106 * Figure out where task on dead CPU should go, use force if necessary.
6129 * NOTE: interrupts should be disabled by the caller
6130 */ 6107 */
6131static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 6108static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6132{ 6109{
@@ -6636,28 +6613,6 @@ early_initcall(migration_init);
6636 6613
6637#ifdef CONFIG_SCHED_DEBUG 6614#ifdef CONFIG_SCHED_DEBUG
6638 6615
6639static inline const char *sd_level_to_string(enum sched_domain_level lvl)
6640{
6641 switch (lvl) {
6642 case SD_LV_NONE:
6643 return "NONE";
6644 case SD_LV_SIBLING:
6645 return "SIBLING";
6646 case SD_LV_MC:
6647 return "MC";
6648 case SD_LV_CPU:
6649 return "CPU";
6650 case SD_LV_NODE:
6651 return "NODE";
6652 case SD_LV_ALLNODES:
6653 return "ALLNODES";
6654 case SD_LV_MAX:
6655 return "MAX";
6656
6657 }
6658 return "MAX";
6659}
6660
6661static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6616static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6662 cpumask_t *groupmask) 6617 cpumask_t *groupmask)
6663{ 6618{
@@ -6677,8 +6632,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6677 return -1; 6632 return -1;
6678 } 6633 }
6679 6634
6680 printk(KERN_CONT "span %s level %s\n", 6635 printk(KERN_CONT "span %s level %s\n", str, sd->name);
6681 str, sd_level_to_string(sd->level));
6682 6636
6683 if (!cpu_isset(cpu, sd->span)) { 6637 if (!cpu_isset(cpu, sd->span)) {
6684 printk(KERN_ERR "ERROR: domain->span does not contain " 6638 printk(KERN_ERR "ERROR: domain->span does not contain "
@@ -7334,13 +7288,21 @@ struct allmasks {
7334}; 7288};
7335 7289
7336#if NR_CPUS > 128 7290#if NR_CPUS > 128
7337#define SCHED_CPUMASK_ALLOC 1 7291#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
7338#define SCHED_CPUMASK_FREE(v) kfree(v) 7292static inline void sched_cpumask_alloc(struct allmasks **masks)
7339#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v 7293{
7294 *masks = kmalloc(sizeof(**masks), GFP_KERNEL);
7295}
7296static inline void sched_cpumask_free(struct allmasks *masks)
7297{
7298 kfree(masks);
7299}
7340#else 7300#else
7341#define SCHED_CPUMASK_ALLOC 0 7301#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
7342#define SCHED_CPUMASK_FREE(v) 7302static inline void sched_cpumask_alloc(struct allmasks **masks)
7343#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v 7303{ }
7304static inline void sched_cpumask_free(struct allmasks *masks)
7305{ }
7344#endif 7306#endif
7345 7307
7346#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \ 7308#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
@@ -7416,9 +7378,8 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7416 return -ENOMEM; 7378 return -ENOMEM;
7417 } 7379 }
7418 7380
7419#if SCHED_CPUMASK_ALLOC
7420 /* get space for all scratch cpumask variables */ 7381 /* get space for all scratch cpumask variables */
7421 allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL); 7382 sched_cpumask_alloc(&allmasks);
7422 if (!allmasks) { 7383 if (!allmasks) {
7423 printk(KERN_WARNING "Cannot alloc cpumask array\n"); 7384 printk(KERN_WARNING "Cannot alloc cpumask array\n");
7424 kfree(rd); 7385 kfree(rd);
@@ -7427,7 +7388,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7427#endif 7388#endif
7428 return -ENOMEM; 7389 return -ENOMEM;
7429 } 7390 }
7430#endif 7391
7431 tmpmask = (cpumask_t *)allmasks; 7392 tmpmask = (cpumask_t *)allmasks;
7432 7393
7433 7394
@@ -7681,13 +7642,13 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7681 cpu_attach_domain(sd, rd, i); 7642 cpu_attach_domain(sd, rd, i);
7682 } 7643 }
7683 7644
7684 SCHED_CPUMASK_FREE((void *)allmasks); 7645 sched_cpumask_free(allmasks);
7685 return 0; 7646 return 0;
7686 7647
7687#ifdef CONFIG_NUMA 7648#ifdef CONFIG_NUMA
7688error: 7649error:
7689 free_sched_groups(cpu_map, tmpmask); 7650 free_sched_groups(cpu_map, tmpmask);
7690 SCHED_CPUMASK_FREE((void *)allmasks); 7651 sched_cpumask_free(allmasks);
7691 kfree(rd); 7652 kfree(rd);
7692 return -ENOMEM; 7653 return -ENOMEM;
7693#endif 7654#endif
@@ -7751,8 +7712,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7751 cpumask_t tmpmask; 7712 cpumask_t tmpmask;
7752 int i; 7713 int i;
7753 7714
7754 unregister_sched_domain_sysctl();
7755
7756 for_each_cpu_mask_nr(i, *cpu_map) 7715 for_each_cpu_mask_nr(i, *cpu_map)
7757 cpu_attach_domain(NULL, &def_root_domain, i); 7716 cpu_attach_domain(NULL, &def_root_domain, i);
7758 synchronize_sched(); 7717 synchronize_sched();
@@ -7830,7 +7789,7 @@ match1:
7830 ndoms_cur = 0; 7789 ndoms_cur = 0;
7831 doms_new = &fallback_doms; 7790 doms_new = &fallback_doms;
7832 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); 7791 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7833 dattr_new = NULL; 7792 WARN_ON_ONCE(dattr_new);
7834 } 7793 }
7835 7794
7836 /* Build new domains */ 7795 /* Build new domains */
@@ -8490,7 +8449,7 @@ static
8490int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) 8449int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8491{ 8450{
8492 struct cfs_rq *cfs_rq; 8451 struct cfs_rq *cfs_rq;
8493 struct sched_entity *se, *parent_se; 8452 struct sched_entity *se;
8494 struct rq *rq; 8453 struct rq *rq;
8495 int i; 8454 int i;
8496 8455
@@ -8506,18 +8465,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8506 for_each_possible_cpu(i) { 8465 for_each_possible_cpu(i) {
8507 rq = cpu_rq(i); 8466 rq = cpu_rq(i);
8508 8467
8509 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), 8468 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8510 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8469 GFP_KERNEL, cpu_to_node(i));
8511 if (!cfs_rq) 8470 if (!cfs_rq)
8512 goto err; 8471 goto err;
8513 8472
8514 se = kmalloc_node(sizeof(struct sched_entity), 8473 se = kzalloc_node(sizeof(struct sched_entity),
8515 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8474 GFP_KERNEL, cpu_to_node(i));
8516 if (!se) 8475 if (!se)
8517 goto err; 8476 goto err;
8518 8477
8519 parent_se = parent ? parent->se[i] : NULL; 8478 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
8520 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
8521 } 8479 }
8522 8480
8523 return 1; 8481 return 1;
@@ -8578,7 +8536,7 @@ static
8578int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 8536int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8579{ 8537{
8580 struct rt_rq *rt_rq; 8538 struct rt_rq *rt_rq;
8581 struct sched_rt_entity *rt_se, *parent_se; 8539 struct sched_rt_entity *rt_se;
8582 struct rq *rq; 8540 struct rq *rq;
8583 int i; 8541 int i;
8584 8542
@@ -8595,18 +8553,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8595 for_each_possible_cpu(i) { 8553 for_each_possible_cpu(i) {
8596 rq = cpu_rq(i); 8554 rq = cpu_rq(i);
8597 8555
8598 rt_rq = kmalloc_node(sizeof(struct rt_rq), 8556 rt_rq = kzalloc_node(sizeof(struct rt_rq),
8599 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8557 GFP_KERNEL, cpu_to_node(i));
8600 if (!rt_rq) 8558 if (!rt_rq)
8601 goto err; 8559 goto err;
8602 8560
8603 rt_se = kmalloc_node(sizeof(struct sched_rt_entity), 8561 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
8604 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8562 GFP_KERNEL, cpu_to_node(i));
8605 if (!rt_se) 8563 if (!rt_se)
8606 goto err; 8564 goto err;
8607 8565
8608 parent_se = parent ? parent->rt_se[i] : NULL; 8566 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
8609 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
8610 } 8567 }
8611 8568
8612 return 1; 8569 return 1;
@@ -9249,11 +9206,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
9249 * (balbir@in.ibm.com). 9206 * (balbir@in.ibm.com).
9250 */ 9207 */
9251 9208
9252/* track cpu usage of a group of tasks */ 9209/* track cpu usage of a group of tasks and its child groups */
9253struct cpuacct { 9210struct cpuacct {
9254 struct cgroup_subsys_state css; 9211 struct cgroup_subsys_state css;
9255 /* cpuusage holds pointer to a u64-type object on every cpu */ 9212 /* cpuusage holds pointer to a u64-type object on every cpu */
9256 u64 *cpuusage; 9213 u64 *cpuusage;
9214 struct cpuacct *parent;
9257}; 9215};
9258 9216
9259struct cgroup_subsys cpuacct_subsys; 9217struct cgroup_subsys cpuacct_subsys;
@@ -9287,6 +9245,9 @@ static struct cgroup_subsys_state *cpuacct_create(
9287 return ERR_PTR(-ENOMEM); 9245 return ERR_PTR(-ENOMEM);
9288 } 9246 }
9289 9247
9248 if (cgrp->parent)
9249 ca->parent = cgroup_ca(cgrp->parent);
9250
9290 return &ca->css; 9251 return &ca->css;
9291} 9252}
9292 9253
@@ -9366,14 +9327,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
9366static void cpuacct_charge(struct task_struct *tsk, u64 cputime) 9327static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9367{ 9328{
9368 struct cpuacct *ca; 9329 struct cpuacct *ca;
9330 int cpu;
9369 9331
9370 if (!cpuacct_subsys.active) 9332 if (!cpuacct_subsys.active)
9371 return; 9333 return;
9372 9334
9335 cpu = task_cpu(tsk);
9373 ca = task_ca(tsk); 9336 ca = task_ca(tsk);
9374 if (ca) {
9375 u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
9376 9337
9338 for (; ca; ca = ca->parent) {
9339 u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
9377 *cpuusage += cputime; 9340 *cpuusage += cputime;
9378 } 9341 }
9379} 9342}
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 26ed8e3d1c15..4293cfa9681d 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec)
53 53
54#define SPLIT_NS(x) nsec_high(x), nsec_low(x) 54#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
55 55
56#ifdef CONFIG_FAIR_GROUP_SCHED
57static void print_cfs_group_stats(struct seq_file *m, int cpu,
58 struct task_group *tg)
59{
60 struct sched_entity *se = tg->se[cpu];
61 if (!se)
62 return;
63
64#define P(F) \
65 SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
66#define PN(F) \
67 SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
68
69 PN(se->exec_start);
70 PN(se->vruntime);
71 PN(se->sum_exec_runtime);
72#ifdef CONFIG_SCHEDSTATS
73 PN(se->wait_start);
74 PN(se->sleep_start);
75 PN(se->block_start);
76 PN(se->sleep_max);
77 PN(se->block_max);
78 PN(se->exec_max);
79 PN(se->slice_max);
80 PN(se->wait_max);
81 PN(se->wait_sum);
82 P(se->wait_count);
83#endif
84 P(se->load.weight);
85#undef PN
86#undef P
87}
88#endif
89
56static void 90static void
57print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 91print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
58{ 92{
@@ -121,20 +155,19 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
121 155
122#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) 156#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
123 char path[128] = ""; 157 char path[128] = "";
124 struct cgroup *cgroup = NULL;
125 struct task_group *tg = cfs_rq->tg; 158 struct task_group *tg = cfs_rq->tg;
126 159
127 if (tg) 160 cgroup_path(tg->css.cgroup, path, sizeof(path));
128 cgroup = tg->css.cgroup;
129
130 if (cgroup)
131 cgroup_path(cgroup, path, sizeof(path));
132 161
133 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); 162 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
163#elif defined(CONFIG_USER_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
164 {
165 uid_t uid = cfs_rq->tg->uid;
166 SEQ_printf(m, "\ncfs_rq[%d] for UID: %u\n", cpu, uid);
167 }
134#else 168#else
135 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); 169 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
136#endif 170#endif
137
138 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 171 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
139 SPLIT_NS(cfs_rq->exec_clock)); 172 SPLIT_NS(cfs_rq->exec_clock));
140 173
@@ -168,6 +201,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
168#ifdef CONFIG_SMP 201#ifdef CONFIG_SMP
169 SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); 202 SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
170#endif 203#endif
204 print_cfs_group_stats(m, cpu, cfs_rq->tg);
171#endif 205#endif
172} 206}
173 207
@@ -175,14 +209,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
175{ 209{
176#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) 210#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
177 char path[128] = ""; 211 char path[128] = "";
178 struct cgroup *cgroup = NULL;
179 struct task_group *tg = rt_rq->tg; 212 struct task_group *tg = rt_rq->tg;
180 213
181 if (tg) 214 cgroup_path(tg->css.cgroup, path, sizeof(path));
182 cgroup = tg->css.cgroup;
183
184 if (cgroup)
185 cgroup_path(cgroup, path, sizeof(path));
186 215
187 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); 216 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
188#else 217#else
@@ -272,7 +301,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
272 u64 now = ktime_to_ns(ktime_get()); 301 u64 now = ktime_to_ns(ktime_get());
273 int cpu; 302 int cpu;
274 303
275 SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n", 304 SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
276 init_utsname()->release, 305 init_utsname()->release,
277 (int)strcspn(init_utsname()->version, " "), 306 (int)strcspn(init_utsname()->version, " "),
278 init_utsname()->version); 307 init_utsname()->version);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d9ba9d5f99d6..587a16e2a8f5 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq)
537 for_each_sched_rt_entity(rt_se) { 537 for_each_sched_rt_entity(rt_se) {
538 rt_rq = rt_rq_of_se(rt_se); 538 rt_rq = rt_rq_of_se(rt_se);
539 539
540 spin_lock(&rt_rq->rt_runtime_lock);
541 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 540 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
541 spin_lock(&rt_rq->rt_runtime_lock);
542 rt_rq->rt_time += delta_exec; 542 rt_rq->rt_time += delta_exec;
543 if (sched_rt_runtime_exceeded(rt_rq)) 543 if (sched_rt_runtime_exceeded(rt_rq))
544 resched_task(curr); 544 resched_task(curr);
545 spin_unlock(&rt_rq->rt_runtime_lock);
545 } 546 }
546 spin_unlock(&rt_rq->rt_runtime_lock);
547 } 547 }
548} 548}
549 549
@@ -909,9 +909,6 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
909/* Only try algorithms three times */ 909/* Only try algorithms three times */
910#define RT_MAX_TRIES 3 910#define RT_MAX_TRIES 3
911 911
912static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
913static void double_unlock_balance(struct rq *this_rq, struct rq *busiest);
914
915static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 912static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
916 913
917static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 914static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
diff --git a/kernel/user.c b/kernel/user.c
index 39d6159fae43..cec2224bc9f5 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -101,6 +101,8 @@ static int sched_create_user(struct user_struct *up)
101 if (IS_ERR(up->tg)) 101 if (IS_ERR(up->tg))
102 rc = -ENOMEM; 102 rc = -ENOMEM;
103 103
104 set_tg_uid(up);
105
104 return rc; 106 return rc;
105} 107}
106 108