aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/controllers/cpuacct.txt32
-rw-r--r--Documentation/lockstat.txt51
-rw-r--r--Documentation/scheduler/sched-arch.txt4
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/include/asm/topology.h2
-rw-r--r--arch/m32r/Kconfig2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h1
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/topology.h1
-rw-r--r--arch/sh/include/asm/topology.h1
-rw-r--r--arch/um/include/asm/system.h14
-rw-r--r--arch/x86/Kconfig4
-rw-r--r--arch/x86/include/asm/uaccess.h2
-rw-r--r--arch/x86/include/asm/uaccess_32.h8
-rw-r--r--arch/x86/include/asm/uaccess_64.h6
-rw-r--r--arch/x86/lib/usercopy_32.c8
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--include/asm-m32r/system.h2
-rw-r--r--include/linux/debug_locks.h2
-rw-r--r--include/linux/futex.h2
-rw-r--r--include/linux/kernel.h11
-rw-r--r--include/linux/lockdep.h31
-rw-r--r--include/linux/mutex.h2
-rw-r--r--include/linux/rcuclassic.h2
-rw-r--r--include/linux/sched.h37
-rw-r--r--include/linux/uaccess.h2
-rw-r--r--kernel/Makefile3
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/extable.c16
-rw-r--r--kernel/futex.c290
-rw-r--r--kernel/lockdep.c33
-rw-r--r--kernel/lockdep_proc.c28
-rw-r--r--kernel/mutex.c10
-rw-r--r--kernel/notifier.c8
-rw-r--r--kernel/posix-cpu-timers.c10
-rw-r--r--kernel/rcuclassic.c6
-rw-r--r--kernel/sched.c1046
-rw-r--r--kernel/sched_cpupri.c39
-rw-r--r--kernel/sched_cpupri.h5
-rw-r--r--kernel/sched_debug.c51
-rw-r--r--kernel/sched_fair.c14
-rw-r--r--kernel/sched_rt.c80
-rw-r--r--kernel/sched_stats.h3
-rw-r--r--kernel/softlockup.c2
-rw-r--r--kernel/sys.c2
-rw-r--r--kernel/time/tick-sched.c10
-rw-r--r--lib/Kconfig.debug10
-rw-r--r--mm/memory.c15
49 files changed, 1041 insertions, 879 deletions
diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt
new file mode 100644
index 000000000000..bb775fbe43d7
--- /dev/null
+++ b/Documentation/controllers/cpuacct.txt
@@ -0,0 +1,32 @@
1CPU Accounting Controller
2-------------------------
3
4The CPU accounting controller is used to group tasks using cgroups and
5account the CPU usage of these groups of tasks.
6
7The CPU accounting controller supports multi-hierarchy groups. An accounting
8group accumulates the CPU usage of all of its child groups and the tasks
9directly present in its group.
10
11Accounting groups can be created by first mounting the cgroup filesystem.
12
13# mkdir /cgroups
14# mount -t cgroup -ocpuacct none /cgroups
15
16With the above step, the initial or the parent accounting group
17becomes visible at /cgroups. At bootup, this group includes all the
18tasks in the system. /cgroups/tasks lists the tasks in this cgroup.
19/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by
20this group which is essentially the CPU time obtained by all the tasks
21in the system.
22
23New accounting groups can be created under the parent group /cgroups.
24
25# cd /cgroups
26# mkdir g1
27# echo $$ > g1
28
29The above steps create a new group g1 and move the current shell
30process (bash) into it. CPU time consumed by this bash and its children
31can be obtained from g1/cpuacct.usage and the same is accumulated in
32/cgroups/cpuacct.usage also.
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
index 4ba4664ce5c3..9cb9138f7a79 100644
--- a/Documentation/lockstat.txt
+++ b/Documentation/lockstat.txt
@@ -71,35 +71,50 @@ Look at the current lock statistics:
71 71
72# less /proc/lock_stat 72# less /proc/lock_stat
73 73
7401 lock_stat version 0.2 7401 lock_stat version 0.3
7502 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 7502 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
7603 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total 7603 class name con-bounces contentions waittime-min waittime-max waittime-total acq-bounces acquisitions holdtime-min holdtime-max holdtime-total
7704 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 7704 -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
7805 7805
7906 &inode->i_data.tree_lock-W: 15 21657 0.18 1093295.30 11547131054.85 58 10415 0.16 87.51 6387.60 7906 &mm->mmap_sem-W: 233 538 18446744073708 22924.27 607243.51 1342 45806 1.71 8595.89 1180582.34
8007 &inode->i_data.tree_lock-R: 0 0 0.00 0.00 0.00 23302 231198 0.25 8.45 98023.38 8007 &mm->mmap_sem-R: 205 587 18446744073708 28403.36 731975.00 1940 412426 0.58 187825.45 6307502.88
8108 -------------------------- 8108 ---------------
8209 &inode->i_data.tree_lock 0 [<ffffffff8027c08f>] add_to_page_cache+0x5f/0x190 8209 &mm->mmap_sem 487 [<ffffffff8053491f>] do_page_fault+0x466/0x928
8310 8310 &mm->mmap_sem 179 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
8411 ............................................................................................................................................................................................... 8411 &mm->mmap_sem 279 [<ffffffff80210a57>] sys_mmap+0x75/0xce
8512 8512 &mm->mmap_sem 76 [<ffffffff802a490b>] sys_munmap+0x32/0x59
8613 dcache_lock: 1037 1161 0.38 45.32 774.51 6611 243371 0.15 306.48 77387.24 8613 ---------------
8714 ----------- 8714 &mm->mmap_sem 270 [<ffffffff80210a57>] sys_mmap+0x75/0xce
8815 dcache_lock 180 [<ffffffff802c0d7e>] sys_getcwd+0x11e/0x230 8815 &mm->mmap_sem 431 [<ffffffff8053491f>] do_page_fault+0x466/0x928
8916 dcache_lock 165 [<ffffffff802c002a>] d_alloc+0x15a/0x210 8916 &mm->mmap_sem 138 [<ffffffff802a490b>] sys_munmap+0x32/0x59
9017 dcache_lock 33 [<ffffffff8035818d>] _atomic_dec_and_lock+0x4d/0x70 9017 &mm->mmap_sem 145 [<ffffffff802a6200>] sys_mprotect+0xcd/0x21d
9118 dcache_lock 1 [<ffffffff802beef8>] shrink_dcache_parent+0x18/0x130 9118
9219 ...............................................................................................................................................................................................
9320
9421 dcache_lock: 621 623 0.52 118.26 1053.02 6745 91930 0.29 316.29 118423.41
9522 -----------
9623 dcache_lock 179 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
9724 dcache_lock 113 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
9825 dcache_lock 99 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
9926 dcache_lock 104 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
10027 -----------
10128 dcache_lock 192 [<ffffffff80378274>] _atomic_dec_and_lock+0x34/0x54
10229 dcache_lock 98 [<ffffffff802ca0dc>] d_rehash+0x1b/0x44
10330 dcache_lock 72 [<ffffffff802cc17b>] d_alloc+0x19a/0x1eb
10431 dcache_lock 112 [<ffffffff802cbca0>] d_instantiate+0x36/0x8a
92 105
93This excerpt shows the first two lock class statistics. Line 01 shows the 106This excerpt shows the first two lock class statistics. Line 01 shows the
94output version - each time the format changes this will be updated. Line 02-04 107output version - each time the format changes this will be updated. Line 02-04
95show the header with column descriptions. Lines 05-10 and 13-18 show the actual 108show the header with column descriptions. Lines 05-18 and 20-31 show the actual
96statistics. These statistics come in two parts; the actual stats separated by a 109statistics. These statistics come in two parts; the actual stats separated by a
97short separator (line 08, 14) from the contention points. 110short separator (line 08, 13) from the contention points.
98 111
99The first lock (05-10) is a read/write lock, and shows two lines above the 112The first lock (05-18) is a read/write lock, and shows two lines above the
100short separator. The contention points don't match the column descriptors, 113short separator. The contention points don't match the column descriptors,
101they have two: contentions and [<IP>] symbol. 114they have two: contentions and [<IP>] symbol. The second set of contention
115points are the points we're contending with.
102 116
117The integer part of the time values is in us.
103 118
104View the top contending locks: 119View the top contending locks:
105 120
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
index 941615a9769b..d43dbcbd163b 100644
--- a/Documentation/scheduler/sched-arch.txt
+++ b/Documentation/scheduler/sched-arch.txt
@@ -8,7 +8,7 @@ Context switch
8By default, the switch_to arch function is called with the runqueue 8By default, the switch_to arch function is called with the runqueue
9locked. This is usually not a problem unless switch_to may need to 9locked. This is usually not a problem unless switch_to may need to
10take the runqueue lock. This is usually due to a wake up operation in 10take the runqueue lock. This is usually due to a wake up operation in
11the context switch. See include/asm-ia64/system.h for an example. 11the context switch. See arch/ia64/include/asm/system.h for an example.
12 12
13To request the scheduler call switch_to with the runqueue unlocked, 13To request the scheduler call switch_to with the runqueue unlocked,
14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file 14you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
@@ -23,7 +23,7 @@ disabled. Interrupts may be enabled over the call if it is likely to
23introduce a significant interrupt latency by adding the line 23introduce a significant interrupt latency by adding the line
24`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for 24`#define __ARCH_WANT_INTERRUPTS_ON_CTXSW` in the same place as for
25unlocked context switches. This define also implies 25unlocked context switches. This define also implies
26`__ARCH_WANT_UNLOCKED_CTXSW`. See include/asm-arm/system.h for an 26`__ARCH_WANT_UNLOCKED_CTXSW`. See arch/arm/include/asm/system.h for an
27example. 27example.
28 28
29 29
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 6bd91ed7cd03..7fa8f615ba6e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -99,7 +99,7 @@ config GENERIC_IOMAP
99 bool 99 bool
100 default y 100 default y
101 101
102config SCHED_NO_NO_OMIT_FRAME_POINTER 102config SCHED_OMIT_FRAME_POINTER
103 bool 103 bool
104 default y 104 default y
105 105
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 35bcb641c9e5..a3cc9f65f954 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -55,7 +55,6 @@
55void build_cpu_to_node_map(void); 55void build_cpu_to_node_map(void);
56 56
57#define SD_CPU_INIT (struct sched_domain) { \ 57#define SD_CPU_INIT (struct sched_domain) { \
58 .span = CPU_MASK_NONE, \
59 .parent = NULL, \ 58 .parent = NULL, \
60 .child = NULL, \ 59 .child = NULL, \
61 .groups = NULL, \ 60 .groups = NULL, \
@@ -80,7 +79,6 @@ void build_cpu_to_node_map(void);
80 79
81/* sched_domains SD_NODE_INIT for IA64 NUMA machines */ 80/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
82#define SD_NODE_INIT (struct sched_domain) { \ 81#define SD_NODE_INIT (struct sched_domain) { \
83 .span = CPU_MASK_NONE, \
84 .parent = NULL, \ 82 .parent = NULL, \
85 .child = NULL, \ 83 .child = NULL, \
86 .groups = NULL, \ 84 .groups = NULL, \
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index dbaed4a63815..29047d5c259a 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -273,7 +273,7 @@ config GENERIC_CALIBRATE_DELAY
273 bool 273 bool
274 default y 274 default y
275 275
276config SCHED_NO_NO_OMIT_FRAME_POINTER 276config SCHED_OMIT_FRAME_POINTER
277 bool 277 bool
278 default y 278 default y
279 279
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f4af967a6b30..a5255e7c79e0 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -653,7 +653,7 @@ config GENERIC_CMOS_UPDATE
653 bool 653 bool
654 default y 654 default y
655 655
656config SCHED_NO_NO_OMIT_FRAME_POINTER 656config SCHED_OMIT_FRAME_POINTER
657 bool 657 bool
658 default y 658 default y
659 659
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 7785bec732f2..1fb959f98982 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -37,7 +37,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
37 37
38/* sched_domains SD_NODE_INIT for SGI IP27 machines */ 38/* sched_domains SD_NODE_INIT for SGI IP27 machines */
39#define SD_NODE_INIT (struct sched_domain) { \ 39#define SD_NODE_INIT (struct sched_domain) { \
40 .span = CPU_MASK_NONE, \
41 .parent = NULL, \ 40 .parent = NULL, \
42 .child = NULL, \ 41 .child = NULL, \
43 .groups = NULL, \ 42 .groups = NULL, \
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 525c13a4de93..adb23ea1c1ef 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -141,7 +141,7 @@ config GENERIC_NVRAM
141 bool 141 bool
142 default y if PPC32 142 default y if PPC32
143 143
144config SCHED_NO_NO_OMIT_FRAME_POINTER 144config SCHED_OMIT_FRAME_POINTER
145 bool 145 bool
146 default y 146 default y
147 147
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index c32da6f97999..373fca394a54 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -48,7 +48,6 @@ static inline int pcibus_to_node(struct pci_bus *bus)
48 48
49/* sched_domains SD_NODE_INIT for PPC64 machines */ 49/* sched_domains SD_NODE_INIT for PPC64 machines */
50#define SD_NODE_INIT (struct sched_domain) { \ 50#define SD_NODE_INIT (struct sched_domain) { \
51 .span = CPU_MASK_NONE, \
52 .parent = NULL, \ 51 .parent = NULL, \
53 .child = NULL, \ 52 .child = NULL, \
54 .groups = NULL, \ 53 .groups = NULL, \
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 95f0085e098a..279d9cc4a007 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -5,7 +5,6 @@
5 5
6/* sched_domains SD_NODE_INIT for sh machines */ 6/* sched_domains SD_NODE_INIT for sh machines */
7#define SD_NODE_INIT (struct sched_domain) { \ 7#define SD_NODE_INIT (struct sched_domain) { \
8 .span = CPU_MASK_NONE, \
9 .parent = NULL, \ 8 .parent = NULL, \
10 .child = NULL, \ 9 .child = NULL, \
11 .groups = NULL, \ 10 .groups = NULL, \
diff --git a/arch/um/include/asm/system.h b/arch/um/include/asm/system.h
index 753346e2cdfd..ae5f94d6317d 100644
--- a/arch/um/include/asm/system.h
+++ b/arch/um/include/asm/system.h
@@ -11,21 +11,21 @@ extern int get_signals(void);
11extern void block_signals(void); 11extern void block_signals(void);
12extern void unblock_signals(void); 12extern void unblock_signals(void);
13 13
14#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ 14#define raw_local_save_flags(flags) do { typecheck(unsigned long, flags); \
15 (flags) = get_signals(); } while(0) 15 (flags) = get_signals(); } while(0)
16#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ 16#define raw_local_irq_restore(flags) do { typecheck(unsigned long, flags); \
17 set_signals(flags); } while(0) 17 set_signals(flags); } while(0)
18 18
19#define local_irq_save(flags) do { local_save_flags(flags); \ 19#define raw_local_irq_save(flags) do { raw_local_save_flags(flags); \
20 local_irq_disable(); } while(0) 20 raw_local_irq_disable(); } while(0)
21 21
22#define local_irq_enable() unblock_signals() 22#define raw_local_irq_enable() unblock_signals()
23#define local_irq_disable() block_signals() 23#define raw_local_irq_disable() block_signals()
24 24
25#define irqs_disabled() \ 25#define irqs_disabled() \
26({ \ 26({ \
27 unsigned long flags; \ 27 unsigned long flags; \
28 local_save_flags(flags); \ 28 raw_local_save_flags(flags); \
29 (flags == 0); \ 29 (flags == 0); \
30}) 30})
31 31
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 45c86fb94132..c7235e643aff 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -370,10 +370,10 @@ config X86_RDC321X
370 as R-8610-(G). 370 as R-8610-(G).
371 If you don't have one of these chips, you should say N here. 371 If you don't have one of these chips, you should say N here.
372 372
373config SCHED_NO_NO_OMIT_FRAME_POINTER 373config SCHED_OMIT_FRAME_POINTER
374 def_bool y 374 def_bool y
375 prompt "Single-depth WCHAN output" 375 prompt "Single-depth WCHAN output"
376 depends on X86_32 376 depends on X86
377 help 377 help
378 Calculate simpler /proc/<PID>/wchan values. If this option 378 Calculate simpler /proc/<PID>/wchan values. If this option
379 is disabled then wchan values will recurse back to the 379 is disabled then wchan values will recurse back to the
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 35c54921b2e4..99192bb55a53 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -157,6 +157,7 @@ extern int __get_user_bad(void);
157 int __ret_gu; \ 157 int __ret_gu; \
158 unsigned long __val_gu; \ 158 unsigned long __val_gu; \
159 __chk_user_ptr(ptr); \ 159 __chk_user_ptr(ptr); \
160 might_fault(); \
160 switch (sizeof(*(ptr))) { \ 161 switch (sizeof(*(ptr))) { \
161 case 1: \ 162 case 1: \
162 __get_user_x(1, __ret_gu, __val_gu, ptr); \ 163 __get_user_x(1, __ret_gu, __val_gu, ptr); \
@@ -241,6 +242,7 @@ extern void __put_user_8(void);
241 int __ret_pu; \ 242 int __ret_pu; \
242 __typeof__(*(ptr)) __pu_val; \ 243 __typeof__(*(ptr)) __pu_val; \
243 __chk_user_ptr(ptr); \ 244 __chk_user_ptr(ptr); \
245 might_fault(); \
244 __pu_val = x; \ 246 __pu_val = x; \
245 switch (sizeof(*(ptr))) { \ 247 switch (sizeof(*(ptr))) { \
246 case 1: \ 248 case 1: \
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index d095a3aeea1b..5e06259e90e5 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
82static __always_inline unsigned long __must_check 82static __always_inline unsigned long __must_check
83__copy_to_user(void __user *to, const void *from, unsigned long n) 83__copy_to_user(void __user *to, const void *from, unsigned long n)
84{ 84{
85 might_sleep(); 85 might_fault();
86 return __copy_to_user_inatomic(to, from, n); 86 return __copy_to_user_inatomic(to, from, n);
87} 87}
88 88
89static __always_inline unsigned long 89static __always_inline unsigned long
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
137static __always_inline unsigned long 137static __always_inline unsigned long
138__copy_from_user(void *to, const void __user *from, unsigned long n) 138__copy_from_user(void *to, const void __user *from, unsigned long n)
139{ 139{
140 might_sleep(); 140 might_fault();
141 if (__builtin_constant_p(n)) { 141 if (__builtin_constant_p(n)) {
142 unsigned long ret; 142 unsigned long ret;
143 143
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
159static __always_inline unsigned long __copy_from_user_nocache(void *to, 159static __always_inline unsigned long __copy_from_user_nocache(void *to,
160 const void __user *from, unsigned long n) 160 const void __user *from, unsigned long n)
161{ 161{
162 might_sleep(); 162 might_fault();
163 if (__builtin_constant_p(n)) { 163 if (__builtin_constant_p(n)) {
164 unsigned long ret; 164 unsigned long ret;
165 165
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index f8cfd00db450..84210c479fca 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -29,6 +29,8 @@ static __always_inline __must_check
29int __copy_from_user(void *dst, const void __user *src, unsigned size) 29int __copy_from_user(void *dst, const void __user *src, unsigned size)
30{ 30{
31 int ret = 0; 31 int ret = 0;
32
33 might_fault();
32 if (!__builtin_constant_p(size)) 34 if (!__builtin_constant_p(size))
33 return copy_user_generic(dst, (__force void *)src, size); 35 return copy_user_generic(dst, (__force void *)src, size);
34 switch (size) { 36 switch (size) {
@@ -71,6 +73,8 @@ static __always_inline __must_check
71int __copy_to_user(void __user *dst, const void *src, unsigned size) 73int __copy_to_user(void __user *dst, const void *src, unsigned size)
72{ 74{
73 int ret = 0; 75 int ret = 0;
76
77 might_fault();
74 if (!__builtin_constant_p(size)) 78 if (!__builtin_constant_p(size))
75 return copy_user_generic((__force void *)dst, src, size); 79 return copy_user_generic((__force void *)dst, src, size);
76 switch (size) { 80 switch (size) {
@@ -113,6 +117,8 @@ static __always_inline __must_check
113int __copy_in_user(void __user *dst, const void __user *src, unsigned size) 117int __copy_in_user(void __user *dst, const void __user *src, unsigned size)
114{ 118{
115 int ret = 0; 119 int ret = 0;
120
121 might_fault();
116 if (!__builtin_constant_p(size)) 122 if (!__builtin_constant_p(size))
117 return copy_user_generic((__force void *)dst, 123 return copy_user_generic((__force void *)dst,
118 (__force void *)src, size); 124 (__force void *)src, size);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 9e68075544f6..4a20b2f9a381 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon
39#define __do_strncpy_from_user(dst, src, count, res) \ 39#define __do_strncpy_from_user(dst, src, count, res) \
40do { \ 40do { \
41 int __d0, __d1, __d2; \ 41 int __d0, __d1, __d2; \
42 might_sleep(); \ 42 might_fault(); \
43 __asm__ __volatile__( \ 43 __asm__ __volatile__( \
44 " testl %1,%1\n" \ 44 " testl %1,%1\n" \
45 " jz 2f\n" \ 45 " jz 2f\n" \
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user);
126#define __do_clear_user(addr,size) \ 126#define __do_clear_user(addr,size) \
127do { \ 127do { \
128 int __d0; \ 128 int __d0; \
129 might_sleep(); \ 129 might_fault(); \
130 __asm__ __volatile__( \ 130 __asm__ __volatile__( \
131 "0: rep; stosl\n" \ 131 "0: rep; stosl\n" \
132 " movl %2,%0\n" \ 132 " movl %2,%0\n" \
@@ -155,7 +155,7 @@ do { \
155unsigned long 155unsigned long
156clear_user(void __user *to, unsigned long n) 156clear_user(void __user *to, unsigned long n)
157{ 157{
158 might_sleep(); 158 might_fault();
159 if (access_ok(VERIFY_WRITE, to, n)) 159 if (access_ok(VERIFY_WRITE, to, n))
160 __do_clear_user(to, n); 160 __do_clear_user(to, n);
161 return n; 161 return n;
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n)
197 unsigned long mask = -__addr_ok(s); 197 unsigned long mask = -__addr_ok(s);
198 unsigned long res, tmp; 198 unsigned long res, tmp;
199 199
200 might_sleep(); 200 might_fault();
201 201
202 __asm__ __volatile__( 202 __asm__ __volatile__(
203 " testl %0, %0\n" 203 " testl %0, %0\n"
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index f4df6e7c718b..64d6c84e6353 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -15,7 +15,7 @@
15#define __do_strncpy_from_user(dst,src,count,res) \ 15#define __do_strncpy_from_user(dst,src,count,res) \
16do { \ 16do { \
17 long __d0, __d1, __d2; \ 17 long __d0, __d1, __d2; \
18 might_sleep(); \ 18 might_fault(); \
19 __asm__ __volatile__( \ 19 __asm__ __volatile__( \
20 " testq %1,%1\n" \ 20 " testq %1,%1\n" \
21 " jz 2f\n" \ 21 " jz 2f\n" \
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user);
64unsigned long __clear_user(void __user *addr, unsigned long size) 64unsigned long __clear_user(void __user *addr, unsigned long size)
65{ 65{
66 long __d0; 66 long __d0;
67 might_sleep(); 67 might_fault();
68 /* no memory constraint because it doesn't change any memory gcc knows 68 /* no memory constraint because it doesn't change any memory gcc knows
69 about */ 69 about */
70 asm volatile( 70 asm volatile(
diff --git a/include/asm-m32r/system.h b/include/asm-m32r/system.h
index 70a57c8c002b..c980f5ba8de7 100644
--- a/include/asm-m32r/system.h
+++ b/include/asm-m32r/system.h
@@ -23,7 +23,7 @@
23 */ 23 */
24 24
25#if defined(CONFIG_FRAME_POINTER) || \ 25#if defined(CONFIG_FRAME_POINTER) || \
26 !defined(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER) 26 !defined(CONFIG_SCHED_OMIT_FRAME_POINTER)
27#define M32R_PUSH_FP " push fp\n" 27#define M32R_PUSH_FP " push fp\n"
28#define M32R_POP_FP " pop fp\n" 28#define M32R_POP_FP " pop fp\n"
29#else 29#else
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 4aaa4afb1cb9..096476f1fb35 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -17,7 +17,7 @@ extern int debug_locks_off(void);
17({ \ 17({ \
18 int __ret = 0; \ 18 int __ret = 0; \
19 \ 19 \
20 if (unlikely(c)) { \ 20 if (!oops_in_progress && unlikely(c)) { \
21 if (debug_locks_off() && !debug_locks_silent) \ 21 if (debug_locks_off() && !debug_locks_silent) \
22 WARN_ON(1); \ 22 WARN_ON(1); \
23 __ret = 1; \ 23 __ret = 1; \
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 586ab56a3ec3..8f627b9ae2b1 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -164,6 +164,8 @@ union futex_key {
164 } both; 164 } both;
165}; 165};
166 166
167#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
168
167#ifdef CONFIG_FUTEX 169#ifdef CONFIG_FUTEX
168extern void exit_robust_list(struct task_struct *curr); 170extern void exit_robust_list(struct task_struct *curr);
169extern void exit_pi_state_list(struct task_struct *curr); 171extern void exit_pi_state_list(struct task_struct *curr);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index dc7e0d0a6474..269df5a17b30 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -141,6 +141,15 @@ extern int _cond_resched(void);
141 (__x < 0) ? -__x : __x; \ 141 (__x < 0) ? -__x : __x; \
142 }) 142 })
143 143
144#ifdef CONFIG_PROVE_LOCKING
145void might_fault(void);
146#else
147static inline void might_fault(void)
148{
149 might_sleep();
150}
151#endif
152
144extern struct atomic_notifier_head panic_notifier_list; 153extern struct atomic_notifier_head panic_notifier_list;
145extern long (*panic_blink)(long time); 154extern long (*panic_blink)(long time);
146NORET_TYPE void panic(const char * fmt, ...) 155NORET_TYPE void panic(const char * fmt, ...)
@@ -188,6 +197,8 @@ extern unsigned long long memparse(const char *ptr, char **retptr);
188extern int core_kernel_text(unsigned long addr); 197extern int core_kernel_text(unsigned long addr);
189extern int __kernel_text_address(unsigned long addr); 198extern int __kernel_text_address(unsigned long addr);
190extern int kernel_text_address(unsigned long addr); 199extern int kernel_text_address(unsigned long addr);
200extern int func_ptr_is_kernel_text(void *ptr);
201
191struct pid; 202struct pid;
192extern struct pid *session_of_pgrp(struct pid *pgrp); 203extern struct pid *session_of_pgrp(struct pid *pgrp);
193 204
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 29aec6e10020..8956daf64abd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -73,6 +73,8 @@ struct lock_class_key {
73 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES]; 73 struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
74}; 74};
75 75
76#define LOCKSTAT_POINTS 4
77
76/* 78/*
77 * The lock-class itself: 79 * The lock-class itself:
78 */ 80 */
@@ -119,7 +121,8 @@ struct lock_class {
119 int name_version; 121 int name_version;
120 122
121#ifdef CONFIG_LOCK_STAT 123#ifdef CONFIG_LOCK_STAT
122 unsigned long contention_point[4]; 124 unsigned long contention_point[LOCKSTAT_POINTS];
125 unsigned long contending_point[LOCKSTAT_POINTS];
123#endif 126#endif
124}; 127};
125 128
@@ -144,6 +147,7 @@ enum bounce_type {
144 147
145struct lock_class_stats { 148struct lock_class_stats {
146 unsigned long contention_point[4]; 149 unsigned long contention_point[4];
150 unsigned long contending_point[4];
147 struct lock_time read_waittime; 151 struct lock_time read_waittime;
148 struct lock_time write_waittime; 152 struct lock_time write_waittime;
149 struct lock_time read_holdtime; 153 struct lock_time read_holdtime;
@@ -165,6 +169,7 @@ struct lockdep_map {
165 const char *name; 169 const char *name;
166#ifdef CONFIG_LOCK_STAT 170#ifdef CONFIG_LOCK_STAT
167 int cpu; 171 int cpu;
172 unsigned long ip;
168#endif 173#endif
169}; 174};
170 175
@@ -356,7 +361,7 @@ struct lock_class_key { };
356#ifdef CONFIG_LOCK_STAT 361#ifdef CONFIG_LOCK_STAT
357 362
358extern void lock_contended(struct lockdep_map *lock, unsigned long ip); 363extern void lock_contended(struct lockdep_map *lock, unsigned long ip);
359extern void lock_acquired(struct lockdep_map *lock); 364extern void lock_acquired(struct lockdep_map *lock, unsigned long ip);
360 365
361#define LOCK_CONTENDED(_lock, try, lock) \ 366#define LOCK_CONTENDED(_lock, try, lock) \
362do { \ 367do { \
@@ -364,13 +369,13 @@ do { \
364 lock_contended(&(_lock)->dep_map, _RET_IP_); \ 369 lock_contended(&(_lock)->dep_map, _RET_IP_); \
365 lock(_lock); \ 370 lock(_lock); \
366 } \ 371 } \
367 lock_acquired(&(_lock)->dep_map); \ 372 lock_acquired(&(_lock)->dep_map, _RET_IP_); \
368} while (0) 373} while (0)
369 374
370#else /* CONFIG_LOCK_STAT */ 375#else /* CONFIG_LOCK_STAT */
371 376
372#define lock_contended(lockdep_map, ip) do {} while (0) 377#define lock_contended(lockdep_map, ip) do {} while (0)
373#define lock_acquired(lockdep_map) do {} while (0) 378#define lock_acquired(lockdep_map, ip) do {} while (0)
374 379
375#define LOCK_CONTENDED(_lock, try, lock) \ 380#define LOCK_CONTENDED(_lock, try, lock) \
376 lock(_lock) 381 lock(_lock)
@@ -481,4 +486,22 @@ static inline void print_irqtrace_events(struct task_struct *curr)
481# define lock_map_release(l) do { } while (0) 486# define lock_map_release(l) do { } while (0)
482#endif 487#endif
483 488
489#ifdef CONFIG_PROVE_LOCKING
490# define might_lock(lock) \
491do { \
492 typecheck(struct lockdep_map *, &(lock)->dep_map); \
493 lock_acquire(&(lock)->dep_map, 0, 0, 0, 2, NULL, _THIS_IP_); \
494 lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
495} while (0)
496# define might_lock_read(lock) \
497do { \
498 typecheck(struct lockdep_map *, &(lock)->dep_map); \
499 lock_acquire(&(lock)->dep_map, 0, 0, 1, 2, NULL, _THIS_IP_); \
500 lock_release(&(lock)->dep_map, 0, _THIS_IP_); \
501} while (0)
502#else
503# define might_lock(lock) do { } while (0)
504# define might_lock_read(lock) do { } while (0)
505#endif
506
484#endif /* __LINUX_LOCKDEP_H */ 507#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index bc6da10ceee0..7a0e5c4f8072 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -144,6 +144,8 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
144/* 144/*
145 * NOTE: mutex_trylock() follows the spin_trylock() convention, 145 * NOTE: mutex_trylock() follows the spin_trylock() convention,
146 * not the down_trylock() convention! 146 * not the down_trylock() convention!
147 *
148 * Returns 1 if the mutex has been acquired successfully, and 0 on contention.
147 */ 149 */
148extern int mutex_trylock(struct mutex *lock); 150extern int mutex_trylock(struct mutex *lock);
149extern void mutex_unlock(struct mutex *lock); 151extern void mutex_unlock(struct mutex *lock);
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 5f89b62e6983..301dda829e37 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -41,7 +41,7 @@
41#include <linux/seqlock.h> 41#include <linux/seqlock.h>
42 42
43#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 43#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
44#define RCU_SECONDS_TILL_STALL_CHECK ( 3 * HZ) /* for rcp->jiffies_stall */ 44#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rcp->jiffies_stall */
45#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */ 45#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rcp->jiffies_stall */
46#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 46#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
47 47
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b81fc5f7731..423830b6e6e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -250,7 +250,7 @@ extern void init_idle_bootup_task(struct task_struct *idle);
250extern int runqueue_is_locked(void); 250extern int runqueue_is_locked(void);
251extern void task_rq_unlock_wait(struct task_struct *p); 251extern void task_rq_unlock_wait(struct task_struct *p);
252 252
253extern cpumask_t nohz_cpu_mask; 253extern cpumask_var_t nohz_cpu_mask;
254#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 254#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
255extern int select_nohz_load_balancer(int cpu); 255extern int select_nohz_load_balancer(int cpu);
256#else 256#else
@@ -260,8 +260,6 @@ static inline int select_nohz_load_balancer(int cpu)
260} 260}
261#endif 261#endif
262 262
263extern unsigned long rt_needs_cpu(int cpu);
264
265/* 263/*
266 * Only dump TASK_* tasks. (0 for all tasks) 264 * Only dump TASK_* tasks. (0 for all tasks)
267 */ 265 */
@@ -778,7 +776,6 @@ enum cpu_idle_type {
778 776
779struct sched_group { 777struct sched_group {
780 struct sched_group *next; /* Must be a circular list */ 778 struct sched_group *next; /* Must be a circular list */
781 cpumask_t cpumask;
782 779
783 /* 780 /*
784 * CPU power of this group, SCHED_LOAD_SCALE being max power for a 781 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
@@ -791,8 +788,15 @@ struct sched_group {
791 * (see include/linux/reciprocal_div.h) 788 * (see include/linux/reciprocal_div.h)
792 */ 789 */
793 u32 reciprocal_cpu_power; 790 u32 reciprocal_cpu_power;
791
792 unsigned long cpumask[];
794}; 793};
795 794
795static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
796{
797 return to_cpumask(sg->cpumask);
798}
799
796enum sched_domain_level { 800enum sched_domain_level {
797 SD_LV_NONE = 0, 801 SD_LV_NONE = 0,
798 SD_LV_SIBLING, 802 SD_LV_SIBLING,
@@ -816,7 +820,6 @@ struct sched_domain {
816 struct sched_domain *parent; /* top domain must be null terminated */ 820 struct sched_domain *parent; /* top domain must be null terminated */
817 struct sched_domain *child; /* bottom domain must be null terminated */ 821 struct sched_domain *child; /* bottom domain must be null terminated */
818 struct sched_group *groups; /* the balancing groups of the domain */ 822 struct sched_group *groups; /* the balancing groups of the domain */
819 cpumask_t span; /* span of all CPUs in this domain */
820 unsigned long min_interval; /* Minimum balance interval ms */ 823 unsigned long min_interval; /* Minimum balance interval ms */
821 unsigned long max_interval; /* Maximum balance interval ms */ 824 unsigned long max_interval; /* Maximum balance interval ms */
822 unsigned int busy_factor; /* less balancing by factor if busy */ 825 unsigned int busy_factor; /* less balancing by factor if busy */
@@ -871,9 +874,17 @@ struct sched_domain {
871#ifdef CONFIG_SCHED_DEBUG 874#ifdef CONFIG_SCHED_DEBUG
872 char *name; 875 char *name;
873#endif 876#endif
877
878 /* span of all CPUs in this domain */
879 unsigned long span[];
874}; 880};
875 881
876extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 882static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
883{
884 return to_cpumask(sd->span);
885}
886
887extern void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
877 struct sched_domain_attr *dattr_new); 888 struct sched_domain_attr *dattr_new);
878extern int arch_reinit_sched_domains(void); 889extern int arch_reinit_sched_domains(void);
879 890
@@ -882,7 +893,7 @@ extern int arch_reinit_sched_domains(void);
882struct sched_domain_attr; 893struct sched_domain_attr;
883 894
884static inline void 895static inline void
885partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 896partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
886 struct sched_domain_attr *dattr_new) 897 struct sched_domain_attr *dattr_new)
887{ 898{
888} 899}
@@ -964,7 +975,7 @@ struct sched_class {
964 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); 975 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
965 976
966 void (*set_cpus_allowed)(struct task_struct *p, 977 void (*set_cpus_allowed)(struct task_struct *p,
967 const cpumask_t *newmask); 978 const struct cpumask *newmask);
968 979
969 void (*rq_online)(struct rq *rq); 980 void (*rq_online)(struct rq *rq);
970 void (*rq_offline)(struct rq *rq); 981 void (*rq_offline)(struct rq *rq);
@@ -1624,12 +1635,12 @@ extern cputime_t task_gtime(struct task_struct *p);
1624 1635
1625#ifdef CONFIG_SMP 1636#ifdef CONFIG_SMP
1626extern int set_cpus_allowed_ptr(struct task_struct *p, 1637extern int set_cpus_allowed_ptr(struct task_struct *p,
1627 const cpumask_t *new_mask); 1638 const struct cpumask *new_mask);
1628#else 1639#else
1629static inline int set_cpus_allowed_ptr(struct task_struct *p, 1640static inline int set_cpus_allowed_ptr(struct task_struct *p,
1630 const cpumask_t *new_mask) 1641 const struct cpumask *new_mask)
1631{ 1642{
1632 if (!cpu_isset(0, *new_mask)) 1643 if (!cpumask_test_cpu(0, new_mask))
1633 return -EINVAL; 1644 return -EINVAL;
1634 return 0; 1645 return 0;
1635} 1646}
@@ -2242,8 +2253,8 @@ __trace_special(void *__tr, void *__data,
2242} 2253}
2243#endif 2254#endif
2244 2255
2245extern long sched_setaffinity(pid_t pid, const cpumask_t *new_mask); 2256extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
2246extern long sched_getaffinity(pid_t pid, cpumask_t *mask); 2257extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
2247 2258
2248extern int sched_mc_power_savings, sched_smt_power_savings; 2259extern int sched_mc_power_savings, sched_smt_power_savings;
2249 2260
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index fec6decfb983..6b58367d145e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -78,7 +78,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
78 \ 78 \
79 set_fs(KERNEL_DS); \ 79 set_fs(KERNEL_DS); \
80 pagefault_disable(); \ 80 pagefault_disable(); \
81 ret = __get_user(retval, (__force typeof(retval) __user *)(addr)); \ 81 ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \
82 pagefault_enable(); \ 82 pagefault_enable(); \
83 set_fs(old_fs); \ 83 set_fs(old_fs); \
84 ret; \ 84 ret; \
diff --git a/kernel/Makefile b/kernel/Makefile
index 19fad003b19d..6a212b842d86 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -19,7 +19,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
19CFLAGS_REMOVE_rtmutex-debug.o = -pg 19CFLAGS_REMOVE_rtmutex-debug.o = -pg
20CFLAGS_REMOVE_cgroup-debug.o = -pg 20CFLAGS_REMOVE_cgroup-debug.o = -pg
21CFLAGS_REMOVE_sched_clock.o = -pg 21CFLAGS_REMOVE_sched_clock.o = -pg
22CFLAGS_REMOVE_sched.o = -pg
23endif 22endif
24 23
25obj-$(CONFIG_FREEZER) += freezer.o 24obj-$(CONFIG_FREEZER) += freezer.o
@@ -90,7 +89,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += trace/
90obj-$(CONFIG_TRACING) += trace/ 89obj-$(CONFIG_TRACING) += trace/
91obj-$(CONFIG_SMP) += sched_cpupri.o 90obj-$(CONFIG_SMP) += sched_cpupri.o
92 91
93ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y) 92ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
94# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 93# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
95# needed for x86 only. Why this used to be enabled for all architectures is beyond 94# needed for x86 only. Why this used to be enabled for all architectures is beyond
96# me. I suspect most platforms don't need this, but until we know that for sure 95# me. I suspect most platforms don't need this, but until we know that for sure
diff --git a/kernel/exit.c b/kernel/exit.c
index e5ae36ebe8af..61ba5b4b10cf 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1324,10 +1324,10 @@ static int wait_task_zombie(struct task_struct *p, int options,
1324 * group, which consolidates times for all threads in the 1324 * group, which consolidates times for all threads in the
1325 * group including the group leader. 1325 * group including the group leader.
1326 */ 1326 */
1327 thread_group_cputime(p, &cputime);
1327 spin_lock_irq(&p->parent->sighand->siglock); 1328 spin_lock_irq(&p->parent->sighand->siglock);
1328 psig = p->parent->signal; 1329 psig = p->parent->signal;
1329 sig = p->signal; 1330 sig = p->signal;
1330 thread_group_cputime(p, &cputime);
1331 psig->cutime = 1331 psig->cutime =
1332 cputime_add(psig->cutime, 1332 cputime_add(psig->cutime,
1333 cputime_add(cputime.utime, 1333 cputime_add(cputime.utime,
diff --git a/kernel/extable.c b/kernel/extable.c
index feb0317cf09a..e136ed8d82ba 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -67,3 +67,19 @@ int kernel_text_address(unsigned long addr)
67 return 1; 67 return 1;
68 return module_text_address(addr) != NULL; 68 return module_text_address(addr) != NULL;
69} 69}
70
71/*
72 * On some architectures (PPC64, IA64) function pointers
73 * are actually only tokens to some data that then holds the
74 * real function address. As a result, to find if a function
75 * pointer is part of the kernel text, we need to do some
76 * special dereferencing first.
77 */
78int func_ptr_is_kernel_text(void *ptr)
79{
80 unsigned long addr;
81 addr = (unsigned long) dereference_function_descriptor(ptr);
82 if (core_kernel_text(addr))
83 return 1;
84 return module_text_address(addr) != NULL;
85}
diff --git a/kernel/futex.c b/kernel/futex.c
index 8af10027514b..e10c5c8786a6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -123,24 +123,6 @@ struct futex_hash_bucket {
123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS]; 123static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
124 124
125/* 125/*
126 * Take mm->mmap_sem, when futex is shared
127 */
128static inline void futex_lock_mm(struct rw_semaphore *fshared)
129{
130 if (fshared)
131 down_read(fshared);
132}
133
134/*
135 * Release mm->mmap_sem, when the futex is shared
136 */
137static inline void futex_unlock_mm(struct rw_semaphore *fshared)
138{
139 if (fshared)
140 up_read(fshared);
141}
142
143/*
144 * We hash on the keys returned from get_futex_key (see below). 126 * We hash on the keys returned from get_futex_key (see below).
145 */ 127 */
146static struct futex_hash_bucket *hash_futex(union futex_key *key) 128static struct futex_hash_bucket *hash_futex(union futex_key *key)
@@ -161,6 +143,45 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
161 && key1->both.offset == key2->both.offset); 143 && key1->both.offset == key2->both.offset);
162} 144}
163 145
146/*
147 * Take a reference to the resource addressed by a key.
148 * Can be called while holding spinlocks.
149 *
150 */
151static void get_futex_key_refs(union futex_key *key)
152{
153 if (!key->both.ptr)
154 return;
155
156 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
157 case FUT_OFF_INODE:
158 atomic_inc(&key->shared.inode->i_count);
159 break;
160 case FUT_OFF_MMSHARED:
161 atomic_inc(&key->private.mm->mm_count);
162 break;
163 }
164}
165
166/*
167 * Drop a reference to the resource addressed by a key.
168 * The hash bucket spinlock must not be held.
169 */
170static void drop_futex_key_refs(union futex_key *key)
171{
172 if (!key->both.ptr)
173 return;
174
175 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
176 case FUT_OFF_INODE:
177 iput(key->shared.inode);
178 break;
179 case FUT_OFF_MMSHARED:
180 mmdrop(key->private.mm);
181 break;
182 }
183}
184
164/** 185/**
165 * get_futex_key - Get parameters which are the keys for a futex. 186 * get_futex_key - Get parameters which are the keys for a futex.
166 * @uaddr: virtual address of the futex 187 * @uaddr: virtual address of the futex
@@ -179,12 +200,10 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
179 * For other futexes, it points to &current->mm->mmap_sem and 200 * For other futexes, it points to &current->mm->mmap_sem and
180 * caller must have taken the reader lock. but NOT any spinlocks. 201 * caller must have taken the reader lock. but NOT any spinlocks.
181 */ 202 */
182static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared, 203static int get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
183 union futex_key *key)
184{ 204{
185 unsigned long address = (unsigned long)uaddr; 205 unsigned long address = (unsigned long)uaddr;
186 struct mm_struct *mm = current->mm; 206 struct mm_struct *mm = current->mm;
187 struct vm_area_struct *vma;
188 struct page *page; 207 struct page *page;
189 int err; 208 int err;
190 209
@@ -208,100 +227,50 @@ static int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
208 return -EFAULT; 227 return -EFAULT;
209 key->private.mm = mm; 228 key->private.mm = mm;
210 key->private.address = address; 229 key->private.address = address;
230 get_futex_key_refs(key);
211 return 0; 231 return 0;
212 } 232 }
213 /*
214 * The futex is hashed differently depending on whether
215 * it's in a shared or private mapping. So check vma first.
216 */
217 vma = find_extend_vma(mm, address);
218 if (unlikely(!vma))
219 return -EFAULT;
220 233
221 /* 234again:
222 * Permissions. 235 err = get_user_pages_fast(address, 1, 0, &page);
223 */ 236 if (err < 0)
224 if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ)) 237 return err;
225 return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES; 238
239 lock_page(page);
240 if (!page->mapping) {
241 unlock_page(page);
242 put_page(page);
243 goto again;
244 }
226 245
227 /* 246 /*
228 * Private mappings are handled in a simple way. 247 * Private mappings are handled in a simple way.
229 * 248 *
230 * NOTE: When userspace waits on a MAP_SHARED mapping, even if 249 * NOTE: When userspace waits on a MAP_SHARED mapping, even if
231 * it's a read-only handle, it's expected that futexes attach to 250 * it's a read-only handle, it's expected that futexes attach to
232 * the object not the particular process. Therefore we use 251 * the object not the particular process.
233 * VM_MAYSHARE here, not VM_SHARED which is restricted to shared
234 * mappings of _writable_ handles.
235 */ 252 */
236 if (likely(!(vma->vm_flags & VM_MAYSHARE))) { 253 if (PageAnon(page)) {
237 key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */ 254 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
238 key->private.mm = mm; 255 key->private.mm = mm;
239 key->private.address = address; 256 key->private.address = address;
240 return 0; 257 } else {
258 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
259 key->shared.inode = page->mapping->host;
260 key->shared.pgoff = page->index;
241 } 261 }
242 262
243 /* 263 get_futex_key_refs(key);
244 * Linear file mappings are also simple.
245 */
246 key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
247 key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
248 if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
249 key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
250 + vma->vm_pgoff);
251 return 0;
252 }
253 264
254 /* 265 unlock_page(page);
255 * We could walk the page table to read the non-linear 266 put_page(page);
256 * pte, and get the page index without fetching the page 267 return 0;
257 * from swap. But that's a lot of code to duplicate here
258 * for a rare case, so we simply fetch the page.
259 */
260 err = get_user_pages(current, mm, address, 1, 0, 0, &page, NULL);
261 if (err >= 0) {
262 key->shared.pgoff =
263 page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
264 put_page(page);
265 return 0;
266 }
267 return err;
268}
269
270/*
271 * Take a reference to the resource addressed by a key.
272 * Can be called while holding spinlocks.
273 *
274 */
275static void get_futex_key_refs(union futex_key *key)
276{
277 if (key->both.ptr == NULL)
278 return;
279 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
280 case FUT_OFF_INODE:
281 atomic_inc(&key->shared.inode->i_count);
282 break;
283 case FUT_OFF_MMSHARED:
284 atomic_inc(&key->private.mm->mm_count);
285 break;
286 }
287} 268}
288 269
289/* 270static inline
290 * Drop a reference to the resource addressed by a key. 271void put_futex_key(int fshared, union futex_key *key)
291 * The hash bucket spinlock must not be held.
292 */
293static void drop_futex_key_refs(union futex_key *key)
294{ 272{
295 if (!key->both.ptr) 273 drop_futex_key_refs(key);
296 return;
297 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
298 case FUT_OFF_INODE:
299 iput(key->shared.inode);
300 break;
301 case FUT_OFF_MMSHARED:
302 mmdrop(key->private.mm);
303 break;
304 }
305} 274}
306 275
307static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval) 276static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
@@ -328,10 +297,8 @@ static int get_futex_value_locked(u32 *dest, u32 __user *from)
328 297
329/* 298/*
330 * Fault handling. 299 * Fault handling.
331 * if fshared is non NULL, current->mm->mmap_sem is already held
332 */ 300 */
333static int futex_handle_fault(unsigned long address, 301static int futex_handle_fault(unsigned long address, int attempt)
334 struct rw_semaphore *fshared, int attempt)
335{ 302{
336 struct vm_area_struct * vma; 303 struct vm_area_struct * vma;
337 struct mm_struct *mm = current->mm; 304 struct mm_struct *mm = current->mm;
@@ -340,8 +307,7 @@ static int futex_handle_fault(unsigned long address,
340 if (attempt > 2) 307 if (attempt > 2)
341 return ret; 308 return ret;
342 309
343 if (!fshared) 310 down_read(&mm->mmap_sem);
344 down_read(&mm->mmap_sem);
345 vma = find_vma(mm, address); 311 vma = find_vma(mm, address);
346 if (vma && address >= vma->vm_start && 312 if (vma && address >= vma->vm_start &&
347 (vma->vm_flags & VM_WRITE)) { 313 (vma->vm_flags & VM_WRITE)) {
@@ -361,8 +327,7 @@ static int futex_handle_fault(unsigned long address,
361 current->min_flt++; 327 current->min_flt++;
362 } 328 }
363 } 329 }
364 if (!fshared) 330 up_read(&mm->mmap_sem);
365 up_read(&mm->mmap_sem);
366 return ret; 331 return ret;
367} 332}
368 333
@@ -385,6 +350,7 @@ static int refill_pi_state_cache(void)
385 /* pi_mutex gets initialized later */ 350 /* pi_mutex gets initialized later */
386 pi_state->owner = NULL; 351 pi_state->owner = NULL;
387 atomic_set(&pi_state->refcount, 1); 352 atomic_set(&pi_state->refcount, 1);
353 pi_state->key = FUTEX_KEY_INIT;
388 354
389 current->pi_state_cache = pi_state; 355 current->pi_state_cache = pi_state;
390 356
@@ -462,7 +428,7 @@ void exit_pi_state_list(struct task_struct *curr)
462 struct list_head *next, *head = &curr->pi_state_list; 428 struct list_head *next, *head = &curr->pi_state_list;
463 struct futex_pi_state *pi_state; 429 struct futex_pi_state *pi_state;
464 struct futex_hash_bucket *hb; 430 struct futex_hash_bucket *hb;
465 union futex_key key; 431 union futex_key key = FUTEX_KEY_INIT;
466 432
467 if (!futex_cmpxchg_enabled) 433 if (!futex_cmpxchg_enabled)
468 return; 434 return;
@@ -719,20 +685,17 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
719 * Wake up all waiters hashed on the physical page that is mapped 685 * Wake up all waiters hashed on the physical page that is mapped
720 * to this virtual address: 686 * to this virtual address:
721 */ 687 */
722static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared, 688static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
723 int nr_wake, u32 bitset)
724{ 689{
725 struct futex_hash_bucket *hb; 690 struct futex_hash_bucket *hb;
726 struct futex_q *this, *next; 691 struct futex_q *this, *next;
727 struct plist_head *head; 692 struct plist_head *head;
728 union futex_key key; 693 union futex_key key = FUTEX_KEY_INIT;
729 int ret; 694 int ret;
730 695
731 if (!bitset) 696 if (!bitset)
732 return -EINVAL; 697 return -EINVAL;
733 698
734 futex_lock_mm(fshared);
735
736 ret = get_futex_key(uaddr, fshared, &key); 699 ret = get_futex_key(uaddr, fshared, &key);
737 if (unlikely(ret != 0)) 700 if (unlikely(ret != 0))
738 goto out; 701 goto out;
@@ -760,7 +723,7 @@ static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
760 723
761 spin_unlock(&hb->lock); 724 spin_unlock(&hb->lock);
762out: 725out:
763 futex_unlock_mm(fshared); 726 put_futex_key(fshared, &key);
764 return ret; 727 return ret;
765} 728}
766 729
@@ -769,19 +732,16 @@ out:
769 * to this virtual address: 732 * to this virtual address:
770 */ 733 */
771static int 734static int
772futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared, 735futex_wake_op(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
773 u32 __user *uaddr2,
774 int nr_wake, int nr_wake2, int op) 736 int nr_wake, int nr_wake2, int op)
775{ 737{
776 union futex_key key1, key2; 738 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
777 struct futex_hash_bucket *hb1, *hb2; 739 struct futex_hash_bucket *hb1, *hb2;
778 struct plist_head *head; 740 struct plist_head *head;
779 struct futex_q *this, *next; 741 struct futex_q *this, *next;
780 int ret, op_ret, attempt = 0; 742 int ret, op_ret, attempt = 0;
781 743
782retryfull: 744retryfull:
783 futex_lock_mm(fshared);
784
785 ret = get_futex_key(uaddr1, fshared, &key1); 745 ret = get_futex_key(uaddr1, fshared, &key1);
786 if (unlikely(ret != 0)) 746 if (unlikely(ret != 0))
787 goto out; 747 goto out;
@@ -826,18 +786,12 @@ retry:
826 */ 786 */
827 if (attempt++) { 787 if (attempt++) {
828 ret = futex_handle_fault((unsigned long)uaddr2, 788 ret = futex_handle_fault((unsigned long)uaddr2,
829 fshared, attempt); 789 attempt);
830 if (ret) 790 if (ret)
831 goto out; 791 goto out;
832 goto retry; 792 goto retry;
833 } 793 }
834 794
835 /*
836 * If we would have faulted, release mmap_sem,
837 * fault it in and start all over again.
838 */
839 futex_unlock_mm(fshared);
840
841 ret = get_user(dummy, uaddr2); 795 ret = get_user(dummy, uaddr2);
842 if (ret) 796 if (ret)
843 return ret; 797 return ret;
@@ -873,7 +827,8 @@ retry:
873 if (hb1 != hb2) 827 if (hb1 != hb2)
874 spin_unlock(&hb2->lock); 828 spin_unlock(&hb2->lock);
875out: 829out:
876 futex_unlock_mm(fshared); 830 put_futex_key(fshared, &key2);
831 put_futex_key(fshared, &key1);
877 832
878 return ret; 833 return ret;
879} 834}
@@ -882,19 +837,16 @@ out:
882 * Requeue all waiters hashed on one physical page to another 837 * Requeue all waiters hashed on one physical page to another
883 * physical page. 838 * physical page.
884 */ 839 */
885static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared, 840static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
886 u32 __user *uaddr2,
887 int nr_wake, int nr_requeue, u32 *cmpval) 841 int nr_wake, int nr_requeue, u32 *cmpval)
888{ 842{
889 union futex_key key1, key2; 843 union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
890 struct futex_hash_bucket *hb1, *hb2; 844 struct futex_hash_bucket *hb1, *hb2;
891 struct plist_head *head1; 845 struct plist_head *head1;
892 struct futex_q *this, *next; 846 struct futex_q *this, *next;
893 int ret, drop_count = 0; 847 int ret, drop_count = 0;
894 848
895 retry: 849 retry:
896 futex_lock_mm(fshared);
897
898 ret = get_futex_key(uaddr1, fshared, &key1); 850 ret = get_futex_key(uaddr1, fshared, &key1);
899 if (unlikely(ret != 0)) 851 if (unlikely(ret != 0))
900 goto out; 852 goto out;
@@ -917,12 +869,6 @@ static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
917 if (hb1 != hb2) 869 if (hb1 != hb2)
918 spin_unlock(&hb2->lock); 870 spin_unlock(&hb2->lock);
919 871
920 /*
921 * If we would have faulted, release mmap_sem, fault
922 * it in and start all over again.
923 */
924 futex_unlock_mm(fshared);
925
926 ret = get_user(curval, uaddr1); 872 ret = get_user(curval, uaddr1);
927 873
928 if (!ret) 874 if (!ret)
@@ -974,7 +920,8 @@ out_unlock:
974 drop_futex_key_refs(&key1); 920 drop_futex_key_refs(&key1);
975 921
976out: 922out:
977 futex_unlock_mm(fshared); 923 put_futex_key(fshared, &key2);
924 put_futex_key(fshared, &key1);
978 return ret; 925 return ret;
979} 926}
980 927
@@ -1096,8 +1043,7 @@ static void unqueue_me_pi(struct futex_q *q)
1096 * private futexes. 1043 * private futexes.
1097 */ 1044 */
1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1045static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1099 struct task_struct *newowner, 1046 struct task_struct *newowner, int fshared)
1100 struct rw_semaphore *fshared)
1101{ 1047{
1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1048 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1103 struct futex_pi_state *pi_state = q->pi_state; 1049 struct futex_pi_state *pi_state = q->pi_state;
@@ -1176,7 +1122,7 @@ retry:
1176handle_fault: 1122handle_fault:
1177 spin_unlock(q->lock_ptr); 1123 spin_unlock(q->lock_ptr);
1178 1124
1179 ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++); 1125 ret = futex_handle_fault((unsigned long)uaddr, attempt++);
1180 1126
1181 spin_lock(q->lock_ptr); 1127 spin_lock(q->lock_ptr);
1182 1128
@@ -1200,7 +1146,7 @@ handle_fault:
1200 1146
1201static long futex_wait_restart(struct restart_block *restart); 1147static long futex_wait_restart(struct restart_block *restart);
1202 1148
1203static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared, 1149static int futex_wait(u32 __user *uaddr, int fshared,
1204 u32 val, ktime_t *abs_time, u32 bitset) 1150 u32 val, ktime_t *abs_time, u32 bitset)
1205{ 1151{
1206 struct task_struct *curr = current; 1152 struct task_struct *curr = current;
@@ -1218,8 +1164,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1218 q.pi_state = NULL; 1164 q.pi_state = NULL;
1219 q.bitset = bitset; 1165 q.bitset = bitset;
1220 retry: 1166 retry:
1221 futex_lock_mm(fshared); 1167 q.key = FUTEX_KEY_INIT;
1222
1223 ret = get_futex_key(uaddr, fshared, &q.key); 1168 ret = get_futex_key(uaddr, fshared, &q.key);
1224 if (unlikely(ret != 0)) 1169 if (unlikely(ret != 0))
1225 goto out_release_sem; 1170 goto out_release_sem;
@@ -1251,12 +1196,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1251 if (unlikely(ret)) { 1196 if (unlikely(ret)) {
1252 queue_unlock(&q, hb); 1197 queue_unlock(&q, hb);
1253 1198
1254 /*
1255 * If we would have faulted, release mmap_sem, fault it in and
1256 * start all over again.
1257 */
1258 futex_unlock_mm(fshared);
1259
1260 ret = get_user(uval, uaddr); 1199 ret = get_user(uval, uaddr);
1261 1200
1262 if (!ret) 1201 if (!ret)
@@ -1271,12 +1210,6 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1271 queue_me(&q, hb); 1210 queue_me(&q, hb);
1272 1211
1273 /* 1212 /*
1274 * Now the futex is queued and we have checked the data, we
1275 * don't want to hold mmap_sem while we sleep.
1276 */
1277 futex_unlock_mm(fshared);
1278
1279 /*
1280 * There might have been scheduling since the queue_me(), as we 1213 * There might have been scheduling since the queue_me(), as we
1281 * cannot hold a spinlock across the get_user() in case it 1214 * cannot hold a spinlock across the get_user() in case it
1282 * faults, and we cannot just set TASK_INTERRUPTIBLE state when 1215 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
@@ -1363,7 +1296,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1363 queue_unlock(&q, hb); 1296 queue_unlock(&q, hb);
1364 1297
1365 out_release_sem: 1298 out_release_sem:
1366 futex_unlock_mm(fshared); 1299 put_futex_key(fshared, &q.key);
1367 return ret; 1300 return ret;
1368} 1301}
1369 1302
@@ -1371,13 +1304,13 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
1371static long futex_wait_restart(struct restart_block *restart) 1304static long futex_wait_restart(struct restart_block *restart)
1372{ 1305{
1373 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr; 1306 u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
1374 struct rw_semaphore *fshared = NULL; 1307 int fshared = 0;
1375 ktime_t t; 1308 ktime_t t;
1376 1309
1377 t.tv64 = restart->futex.time; 1310 t.tv64 = restart->futex.time;
1378 restart->fn = do_no_restart_syscall; 1311 restart->fn = do_no_restart_syscall;
1379 if (restart->futex.flags & FLAGS_SHARED) 1312 if (restart->futex.flags & FLAGS_SHARED)
1380 fshared = &current->mm->mmap_sem; 1313 fshared = 1;
1381 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t, 1314 return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
1382 restart->futex.bitset); 1315 restart->futex.bitset);
1383} 1316}
@@ -1389,7 +1322,7 @@ static long futex_wait_restart(struct restart_block *restart)
1389 * if there are waiters then it will block, it does PI, etc. (Due to 1322 * if there are waiters then it will block, it does PI, etc. (Due to
1390 * races the kernel might see a 0 value of the futex too.) 1323 * races the kernel might see a 0 value of the futex too.)
1391 */ 1324 */
1392static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared, 1325static int futex_lock_pi(u32 __user *uaddr, int fshared,
1393 int detect, ktime_t *time, int trylock) 1326 int detect, ktime_t *time, int trylock)
1394{ 1327{
1395 struct hrtimer_sleeper timeout, *to = NULL; 1328 struct hrtimer_sleeper timeout, *to = NULL;
@@ -1412,8 +1345,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1412 1345
1413 q.pi_state = NULL; 1346 q.pi_state = NULL;
1414 retry: 1347 retry:
1415 futex_lock_mm(fshared); 1348 q.key = FUTEX_KEY_INIT;
1416
1417 ret = get_futex_key(uaddr, fshared, &q.key); 1349 ret = get_futex_key(uaddr, fshared, &q.key);
1418 if (unlikely(ret != 0)) 1350 if (unlikely(ret != 0))
1419 goto out_release_sem; 1351 goto out_release_sem;
@@ -1502,7 +1434,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1502 * exit to complete. 1434 * exit to complete.
1503 */ 1435 */
1504 queue_unlock(&q, hb); 1436 queue_unlock(&q, hb);
1505 futex_unlock_mm(fshared);
1506 cond_resched(); 1437 cond_resched();
1507 goto retry; 1438 goto retry;
1508 1439
@@ -1534,12 +1465,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1534 */ 1465 */
1535 queue_me(&q, hb); 1466 queue_me(&q, hb);
1536 1467
1537 /*
1538 * Now the futex is queued and we have checked the data, we
1539 * don't want to hold mmap_sem while we sleep.
1540 */
1541 futex_unlock_mm(fshared);
1542
1543 WARN_ON(!q.pi_state); 1468 WARN_ON(!q.pi_state);
1544 /* 1469 /*
1545 * Block on the PI mutex: 1470 * Block on the PI mutex:
@@ -1552,7 +1477,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1552 ret = ret ? 0 : -EWOULDBLOCK; 1477 ret = ret ? 0 : -EWOULDBLOCK;
1553 } 1478 }
1554 1479
1555 futex_lock_mm(fshared);
1556 spin_lock(q.lock_ptr); 1480 spin_lock(q.lock_ptr);
1557 1481
1558 if (!ret) { 1482 if (!ret) {
@@ -1618,7 +1542,6 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1618 1542
1619 /* Unqueue and drop the lock */ 1543 /* Unqueue and drop the lock */
1620 unqueue_me_pi(&q); 1544 unqueue_me_pi(&q);
1621 futex_unlock_mm(fshared);
1622 1545
1623 if (to) 1546 if (to)
1624 destroy_hrtimer_on_stack(&to->timer); 1547 destroy_hrtimer_on_stack(&to->timer);
@@ -1628,7 +1551,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1628 queue_unlock(&q, hb); 1551 queue_unlock(&q, hb);
1629 1552
1630 out_release_sem: 1553 out_release_sem:
1631 futex_unlock_mm(fshared); 1554 put_futex_key(fshared, &q.key);
1632 if (to) 1555 if (to)
1633 destroy_hrtimer_on_stack(&to->timer); 1556 destroy_hrtimer_on_stack(&to->timer);
1634 return ret; 1557 return ret;
@@ -1645,15 +1568,12 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1645 queue_unlock(&q, hb); 1568 queue_unlock(&q, hb);
1646 1569
1647 if (attempt++) { 1570 if (attempt++) {
1648 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1571 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1649 attempt);
1650 if (ret) 1572 if (ret)
1651 goto out_release_sem; 1573 goto out_release_sem;
1652 goto retry_unlocked; 1574 goto retry_unlocked;
1653 } 1575 }
1654 1576
1655 futex_unlock_mm(fshared);
1656
1657 ret = get_user(uval, uaddr); 1577 ret = get_user(uval, uaddr);
1658 if (!ret && (uval != -EFAULT)) 1578 if (!ret && (uval != -EFAULT))
1659 goto retry; 1579 goto retry;
@@ -1668,13 +1588,13 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1668 * This is the in-kernel slowpath: we look up the PI state (if any), 1588 * This is the in-kernel slowpath: we look up the PI state (if any),
1669 * and do the rt-mutex unlock. 1589 * and do the rt-mutex unlock.
1670 */ 1590 */
1671static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared) 1591static int futex_unlock_pi(u32 __user *uaddr, int fshared)
1672{ 1592{
1673 struct futex_hash_bucket *hb; 1593 struct futex_hash_bucket *hb;
1674 struct futex_q *this, *next; 1594 struct futex_q *this, *next;
1675 u32 uval; 1595 u32 uval;
1676 struct plist_head *head; 1596 struct plist_head *head;
1677 union futex_key key; 1597 union futex_key key = FUTEX_KEY_INIT;
1678 int ret, attempt = 0; 1598 int ret, attempt = 0;
1679 1599
1680retry: 1600retry:
@@ -1685,10 +1605,6 @@ retry:
1685 */ 1605 */
1686 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current)) 1606 if ((uval & FUTEX_TID_MASK) != task_pid_vnr(current))
1687 return -EPERM; 1607 return -EPERM;
1688 /*
1689 * First take all the futex related locks:
1690 */
1691 futex_lock_mm(fshared);
1692 1608
1693 ret = get_futex_key(uaddr, fshared, &key); 1609 ret = get_futex_key(uaddr, fshared, &key);
1694 if (unlikely(ret != 0)) 1610 if (unlikely(ret != 0))
@@ -1747,7 +1663,7 @@ retry_unlocked:
1747out_unlock: 1663out_unlock:
1748 spin_unlock(&hb->lock); 1664 spin_unlock(&hb->lock);
1749out: 1665out:
1750 futex_unlock_mm(fshared); 1666 put_futex_key(fshared, &key);
1751 1667
1752 return ret; 1668 return ret;
1753 1669
@@ -1763,16 +1679,13 @@ pi_faulted:
1763 spin_unlock(&hb->lock); 1679 spin_unlock(&hb->lock);
1764 1680
1765 if (attempt++) { 1681 if (attempt++) {
1766 ret = futex_handle_fault((unsigned long)uaddr, fshared, 1682 ret = futex_handle_fault((unsigned long)uaddr, attempt);
1767 attempt);
1768 if (ret) 1683 if (ret)
1769 goto out; 1684 goto out;
1770 uval = 0; 1685 uval = 0;
1771 goto retry_unlocked; 1686 goto retry_unlocked;
1772 } 1687 }
1773 1688
1774 futex_unlock_mm(fshared);
1775
1776 ret = get_user(uval, uaddr); 1689 ret = get_user(uval, uaddr);
1777 if (!ret && (uval != -EFAULT)) 1690 if (!ret && (uval != -EFAULT))
1778 goto retry; 1691 goto retry;
@@ -1898,8 +1811,7 @@ retry:
1898 * PI futexes happens in exit_pi_state(): 1811 * PI futexes happens in exit_pi_state():
1899 */ 1812 */
1900 if (!pi && (uval & FUTEX_WAITERS)) 1813 if (!pi && (uval & FUTEX_WAITERS))
1901 futex_wake(uaddr, &curr->mm->mmap_sem, 1, 1814 futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY);
1902 FUTEX_BITSET_MATCH_ANY);
1903 } 1815 }
1904 return 0; 1816 return 0;
1905} 1817}
@@ -1995,10 +1907,10 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
1995{ 1907{
1996 int ret = -ENOSYS; 1908 int ret = -ENOSYS;
1997 int cmd = op & FUTEX_CMD_MASK; 1909 int cmd = op & FUTEX_CMD_MASK;
1998 struct rw_semaphore *fshared = NULL; 1910 int fshared = 0;
1999 1911
2000 if (!(op & FUTEX_PRIVATE_FLAG)) 1912 if (!(op & FUTEX_PRIVATE_FLAG))
2001 fshared = &current->mm->mmap_sem; 1913 fshared = 1;
2002 1914
2003 switch (cmd) { 1915 switch (cmd) {
2004 case FUTEX_WAIT: 1916 case FUTEX_WAIT:
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 74b1878b8bb8..c4c7df23f8c7 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -137,16 +137,16 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
137#ifdef CONFIG_LOCK_STAT 137#ifdef CONFIG_LOCK_STAT
138static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats); 138static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], lock_stats);
139 139
140static int lock_contention_point(struct lock_class *class, unsigned long ip) 140static int lock_point(unsigned long points[], unsigned long ip)
141{ 141{
142 int i; 142 int i;
143 143
144 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 144 for (i = 0; i < LOCKSTAT_POINTS; i++) {
145 if (class->contention_point[i] == 0) { 145 if (points[i] == 0) {
146 class->contention_point[i] = ip; 146 points[i] = ip;
147 break; 147 break;
148 } 148 }
149 if (class->contention_point[i] == ip) 149 if (points[i] == ip)
150 break; 150 break;
151 } 151 }
152 152
@@ -186,6 +186,9 @@ struct lock_class_stats lock_stats(struct lock_class *class)
186 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++) 186 for (i = 0; i < ARRAY_SIZE(stats.contention_point); i++)
187 stats.contention_point[i] += pcs->contention_point[i]; 187 stats.contention_point[i] += pcs->contention_point[i];
188 188
189 for (i = 0; i < ARRAY_SIZE(stats.contending_point); i++)
190 stats.contending_point[i] += pcs->contending_point[i];
191
189 lock_time_add(&pcs->read_waittime, &stats.read_waittime); 192 lock_time_add(&pcs->read_waittime, &stats.read_waittime);
190 lock_time_add(&pcs->write_waittime, &stats.write_waittime); 193 lock_time_add(&pcs->write_waittime, &stats.write_waittime);
191 194
@@ -210,6 +213,7 @@ void clear_lock_stats(struct lock_class *class)
210 memset(cpu_stats, 0, sizeof(struct lock_class_stats)); 213 memset(cpu_stats, 0, sizeof(struct lock_class_stats));
211 } 214 }
212 memset(class->contention_point, 0, sizeof(class->contention_point)); 215 memset(class->contention_point, 0, sizeof(class->contention_point));
216 memset(class->contending_point, 0, sizeof(class->contending_point));
213} 217}
214 218
215static struct lock_class_stats *get_lock_stats(struct lock_class *class) 219static struct lock_class_stats *get_lock_stats(struct lock_class *class)
@@ -3000,7 +3004,7 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
3000 struct held_lock *hlock, *prev_hlock; 3004 struct held_lock *hlock, *prev_hlock;
3001 struct lock_class_stats *stats; 3005 struct lock_class_stats *stats;
3002 unsigned int depth; 3006 unsigned int depth;
3003 int i, point; 3007 int i, contention_point, contending_point;
3004 3008
3005 depth = curr->lockdep_depth; 3009 depth = curr->lockdep_depth;
3006 if (DEBUG_LOCKS_WARN_ON(!depth)) 3010 if (DEBUG_LOCKS_WARN_ON(!depth))
@@ -3024,18 +3028,22 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip)
3024found_it: 3028found_it:
3025 hlock->waittime_stamp = sched_clock(); 3029 hlock->waittime_stamp = sched_clock();
3026 3030
3027 point = lock_contention_point(hlock_class(hlock), ip); 3031 contention_point = lock_point(hlock_class(hlock)->contention_point, ip);
3032 contending_point = lock_point(hlock_class(hlock)->contending_point,
3033 lock->ip);
3028 3034
3029 stats = get_lock_stats(hlock_class(hlock)); 3035 stats = get_lock_stats(hlock_class(hlock));
3030 if (point < ARRAY_SIZE(stats->contention_point)) 3036 if (contention_point < LOCKSTAT_POINTS)
3031 stats->contention_point[point]++; 3037 stats->contention_point[contention_point]++;
3038 if (contending_point < LOCKSTAT_POINTS)
3039 stats->contending_point[contending_point]++;
3032 if (lock->cpu != smp_processor_id()) 3040 if (lock->cpu != smp_processor_id())
3033 stats->bounces[bounce_contended + !!hlock->read]++; 3041 stats->bounces[bounce_contended + !!hlock->read]++;
3034 put_lock_stats(stats); 3042 put_lock_stats(stats);
3035} 3043}
3036 3044
3037static void 3045static void
3038__lock_acquired(struct lockdep_map *lock) 3046__lock_acquired(struct lockdep_map *lock, unsigned long ip)
3039{ 3047{
3040 struct task_struct *curr = current; 3048 struct task_struct *curr = current;
3041 struct held_lock *hlock, *prev_hlock; 3049 struct held_lock *hlock, *prev_hlock;
@@ -3084,6 +3092,7 @@ found_it:
3084 put_lock_stats(stats); 3092 put_lock_stats(stats);
3085 3093
3086 lock->cpu = cpu; 3094 lock->cpu = cpu;
3095 lock->ip = ip;
3087} 3096}
3088 3097
3089void lock_contended(struct lockdep_map *lock, unsigned long ip) 3098void lock_contended(struct lockdep_map *lock, unsigned long ip)
@@ -3105,7 +3114,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
3105} 3114}
3106EXPORT_SYMBOL_GPL(lock_contended); 3115EXPORT_SYMBOL_GPL(lock_contended);
3107 3116
3108void lock_acquired(struct lockdep_map *lock) 3117void lock_acquired(struct lockdep_map *lock, unsigned long ip)
3109{ 3118{
3110 unsigned long flags; 3119 unsigned long flags;
3111 3120
@@ -3118,7 +3127,7 @@ void lock_acquired(struct lockdep_map *lock)
3118 raw_local_irq_save(flags); 3127 raw_local_irq_save(flags);
3119 check_flags(flags); 3128 check_flags(flags);
3120 current->lockdep_recursion = 1; 3129 current->lockdep_recursion = 1;
3121 __lock_acquired(lock); 3130 __lock_acquired(lock, ip);
3122 current->lockdep_recursion = 0; 3131 current->lockdep_recursion = 0;
3123 raw_local_irq_restore(flags); 3132 raw_local_irq_restore(flags);
3124} 3133}
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 20dbcbf9c7dd..13716b813896 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -470,11 +470,12 @@ static void seq_line(struct seq_file *m, char c, int offset, int length)
470 470
471static void snprint_time(char *buf, size_t bufsiz, s64 nr) 471static void snprint_time(char *buf, size_t bufsiz, s64 nr)
472{ 472{
473 unsigned long rem; 473 s64 div;
474 s32 rem;
474 475
475 nr += 5; /* for display rounding */ 476 nr += 5; /* for display rounding */
476 rem = do_div(nr, 1000); /* XXX: do_div_signed */ 477 div = div_s64_rem(nr, 1000, &rem);
477 snprintf(buf, bufsiz, "%lld.%02d", (long long)nr, (int)rem/10); 478 snprintf(buf, bufsiz, "%lld.%02d", (long long)div, (int)rem/10);
478} 479}
479 480
480static void seq_time(struct seq_file *m, s64 time) 481static void seq_time(struct seq_file *m, s64 time)
@@ -556,7 +557,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
556 if (stats->read_holdtime.nr) 557 if (stats->read_holdtime.nr)
557 namelen += 2; 558 namelen += 2;
558 559
559 for (i = 0; i < ARRAY_SIZE(class->contention_point); i++) { 560 for (i = 0; i < LOCKSTAT_POINTS; i++) {
560 char sym[KSYM_SYMBOL_LEN]; 561 char sym[KSYM_SYMBOL_LEN];
561 char ip[32]; 562 char ip[32];
562 563
@@ -573,6 +574,23 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
573 stats->contention_point[i], 574 stats->contention_point[i],
574 ip, sym); 575 ip, sym);
575 } 576 }
577 for (i = 0; i < LOCKSTAT_POINTS; i++) {
578 char sym[KSYM_SYMBOL_LEN];
579 char ip[32];
580
581 if (class->contending_point[i] == 0)
582 break;
583
584 if (!i)
585 seq_line(m, '-', 40-namelen, namelen);
586
587 sprint_symbol(sym, class->contending_point[i]);
588 snprintf(ip, sizeof(ip), "[<%p>]",
589 (void *)class->contending_point[i]);
590 seq_printf(m, "%40s %14lu %29s %s\n", name,
591 stats->contending_point[i],
592 ip, sym);
593 }
576 if (i) { 594 if (i) {
577 seq_puts(m, "\n"); 595 seq_puts(m, "\n");
578 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1)); 596 seq_line(m, '.', 0, 40 + 1 + 10 * (14 + 1));
@@ -582,7 +600,7 @@ static void seq_stats(struct seq_file *m, struct lock_stat_data *data)
582 600
583static void seq_header(struct seq_file *m) 601static void seq_header(struct seq_file *m)
584{ 602{
585 seq_printf(m, "lock_stat version 0.2\n"); 603 seq_printf(m, "lock_stat version 0.3\n");
586 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1)); 604 seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
587 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s " 605 seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
588 "%14s %14s\n", 606 "%14s %14s\n",
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 12c779dc65d4..4f45d4b658ef 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -59,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init);
59 * We also put the fastpath first in the kernel image, to make sure the 59 * We also put the fastpath first in the kernel image, to make sure the
60 * branch is predicted by the CPU as default-untaken. 60 * branch is predicted by the CPU as default-untaken.
61 */ 61 */
62static void noinline __sched 62static __used noinline void __sched
63__mutex_lock_slowpath(atomic_t *lock_count); 63__mutex_lock_slowpath(atomic_t *lock_count);
64 64
65/*** 65/***
@@ -96,7 +96,7 @@ void inline __sched mutex_lock(struct mutex *lock)
96EXPORT_SYMBOL(mutex_lock); 96EXPORT_SYMBOL(mutex_lock);
97#endif 97#endif
98 98
99static noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); 99static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count);
100 100
101/*** 101/***
102 * mutex_unlock - release the mutex 102 * mutex_unlock - release the mutex
@@ -184,7 +184,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
184 } 184 }
185 185
186done: 186done:
187 lock_acquired(&lock->dep_map); 187 lock_acquired(&lock->dep_map, ip);
188 /* got the lock - rejoice! */ 188 /* got the lock - rejoice! */
189 mutex_remove_waiter(lock, &waiter, task_thread_info(task)); 189 mutex_remove_waiter(lock, &waiter, task_thread_info(task));
190 debug_mutex_set_owner(lock, task_thread_info(task)); 190 debug_mutex_set_owner(lock, task_thread_info(task));
@@ -268,7 +268,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested)
268/* 268/*
269 * Release the lock, slowpath: 269 * Release the lock, slowpath:
270 */ 270 */
271static noinline void 271static __used noinline void
272__mutex_unlock_slowpath(atomic_t *lock_count) 272__mutex_unlock_slowpath(atomic_t *lock_count)
273{ 273{
274 __mutex_unlock_common_slowpath(lock_count, 1); 274 __mutex_unlock_common_slowpath(lock_count, 1);
@@ -313,7 +313,7 @@ int __sched mutex_lock_killable(struct mutex *lock)
313} 313}
314EXPORT_SYMBOL(mutex_lock_killable); 314EXPORT_SYMBOL(mutex_lock_killable);
315 315
316static noinline void __sched 316static __used noinline void __sched
317__mutex_lock_slowpath(atomic_t *lock_count) 317__mutex_lock_slowpath(atomic_t *lock_count)
318{ 318{
319 struct mutex *lock = container_of(lock_count, struct mutex, count); 319 struct mutex *lock = container_of(lock_count, struct mutex, count);
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 4282c0a40a57..61d5aa5eced3 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -82,6 +82,14 @@ static int __kprobes notifier_call_chain(struct notifier_block **nl,
82 82
83 while (nb && nr_to_call) { 83 while (nb && nr_to_call) {
84 next_nb = rcu_dereference(nb->next); 84 next_nb = rcu_dereference(nb->next);
85
86#ifdef CONFIG_DEBUG_NOTIFIERS
87 if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {
88 WARN(1, "Invalid notifier called!");
89 nb = next_nb;
90 continue;
91 }
92#endif
85 ret = nb->notifier_call(nb, val, v); 93 ret = nb->notifier_call(nb, val, v);
86 94
87 if (nr_calls) 95 if (nr_calls)
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 895337b16a24..3f4377e0aa04 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -58,21 +58,21 @@ void thread_group_cputime(
58 struct task_struct *tsk, 58 struct task_struct *tsk,
59 struct task_cputime *times) 59 struct task_cputime *times)
60{ 60{
61 struct signal_struct *sig; 61 struct task_cputime *totals, *tot;
62 int i; 62 int i;
63 struct task_cputime *tot;
64 63
65 sig = tsk->signal; 64 totals = tsk->signal->cputime.totals;
66 if (unlikely(!sig) || !sig->cputime.totals) { 65 if (!totals) {
67 times->utime = tsk->utime; 66 times->utime = tsk->utime;
68 times->stime = tsk->stime; 67 times->stime = tsk->stime;
69 times->sum_exec_runtime = tsk->se.sum_exec_runtime; 68 times->sum_exec_runtime = tsk->se.sum_exec_runtime;
70 return; 69 return;
71 } 70 }
71
72 times->stime = times->utime = cputime_zero; 72 times->stime = times->utime = cputime_zero;
73 times->sum_exec_runtime = 0; 73 times->sum_exec_runtime = 0;
74 for_each_possible_cpu(i) { 74 for_each_possible_cpu(i) {
75 tot = per_cpu_ptr(tsk->signal->cputime.totals, i); 75 tot = per_cpu_ptr(totals, i);
76 times->utime = cputime_add(times->utime, tot->utime); 76 times->utime = cputime_add(times->utime, tot->utime);
77 times->stime = cputime_add(times->stime, tot->stime); 77 times->stime = cputime_add(times->stime, tot->stime);
78 times->sum_exec_runtime += tot->sum_exec_runtime; 78 times->sum_exec_runtime += tot->sum_exec_runtime;
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 37f72e551542..c03ca3e61919 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -191,7 +191,7 @@ static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
191 191
192 /* OK, time to rat on our buddy... */ 192 /* OK, time to rat on our buddy... */
193 193
194 printk(KERN_ERR "RCU detected CPU stalls:"); 194 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
195 for_each_possible_cpu(cpu) { 195 for_each_possible_cpu(cpu) {
196 if (cpu_isset(cpu, rcp->cpumask)) 196 if (cpu_isset(cpu, rcp->cpumask))
197 printk(" %d", cpu); 197 printk(" %d", cpu);
@@ -204,7 +204,7 @@ static void print_cpu_stall(struct rcu_ctrlblk *rcp)
204{ 204{
205 unsigned long flags; 205 unsigned long flags;
206 206
207 printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n", 207 printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
208 smp_processor_id(), jiffies, 208 smp_processor_id(), jiffies,
209 jiffies - rcp->gp_start); 209 jiffies - rcp->gp_start);
210 dump_stack(); 210 dump_stack();
@@ -393,7 +393,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
393 * unnecessarily. 393 * unnecessarily.
394 */ 394 */
395 smp_mb(); 395 smp_mb();
396 cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask); 396 cpumask_andnot(&rcp->cpumask, cpu_online_mask, nohz_cpu_mask);
397 397
398 rcp->signaled = 0; 398 rcp->signaled = 0;
399 } 399 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 7729c4bbc8ba..4ed9f588faa6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -487,14 +487,14 @@ struct rt_rq {
487 */ 487 */
488struct root_domain { 488struct root_domain {
489 atomic_t refcount; 489 atomic_t refcount;
490 cpumask_t span; 490 cpumask_var_t span;
491 cpumask_t online; 491 cpumask_var_t online;
492 492
493 /* 493 /*
494 * The "RT overload" flag: it gets set if a CPU has more than 494 * The "RT overload" flag: it gets set if a CPU has more than
495 * one runnable RT task. 495 * one runnable RT task.
496 */ 496 */
497 cpumask_t rto_mask; 497 cpumask_var_t rto_mask;
498 atomic_t rto_count; 498 atomic_t rto_count;
499#ifdef CONFIG_SMP 499#ifdef CONFIG_SMP
500 struct cpupri cpupri; 500 struct cpupri cpupri;
@@ -709,45 +709,18 @@ static __read_mostly char *sched_feat_names[] = {
709 709
710#undef SCHED_FEAT 710#undef SCHED_FEAT
711 711
712static int sched_feat_open(struct inode *inode, struct file *filp) 712static int sched_feat_show(struct seq_file *m, void *v)
713{
714 filp->private_data = inode->i_private;
715 return 0;
716}
717
718static ssize_t
719sched_feat_read(struct file *filp, char __user *ubuf,
720 size_t cnt, loff_t *ppos)
721{ 713{
722 char *buf;
723 int r = 0;
724 int len = 0;
725 int i; 714 int i;
726 715
727 for (i = 0; sched_feat_names[i]; i++) { 716 for (i = 0; sched_feat_names[i]; i++) {
728 len += strlen(sched_feat_names[i]); 717 if (!(sysctl_sched_features & (1UL << i)))
729 len += 4; 718 seq_puts(m, "NO_");
730 } 719 seq_printf(m, "%s ", sched_feat_names[i]);
731
732 buf = kmalloc(len + 2, GFP_KERNEL);
733 if (!buf)
734 return -ENOMEM;
735
736 for (i = 0; sched_feat_names[i]; i++) {
737 if (sysctl_sched_features & (1UL << i))
738 r += sprintf(buf + r, "%s ", sched_feat_names[i]);
739 else
740 r += sprintf(buf + r, "NO_%s ", sched_feat_names[i]);
741 } 720 }
721 seq_puts(m, "\n");
742 722
743 r += sprintf(buf + r, "\n"); 723 return 0;
744 WARN_ON(r >= len + 2);
745
746 r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
747
748 kfree(buf);
749
750 return r;
751} 724}
752 725
753static ssize_t 726static ssize_t
@@ -792,10 +765,17 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
792 return cnt; 765 return cnt;
793} 766}
794 767
768static int sched_feat_open(struct inode *inode, struct file *filp)
769{
770 return single_open(filp, sched_feat_show, NULL);
771}
772
795static struct file_operations sched_feat_fops = { 773static struct file_operations sched_feat_fops = {
796 .open = sched_feat_open, 774 .open = sched_feat_open,
797 .read = sched_feat_read, 775 .write = sched_feat_write,
798 .write = sched_feat_write, 776 .read = seq_read,
777 .llseek = seq_lseek,
778 .release = single_release,
799}; 779};
800 780
801static __init int sched_init_debug(void) 781static __init int sched_init_debug(void)
@@ -1480,27 +1460,13 @@ static void
1480update_group_shares_cpu(struct task_group *tg, int cpu, 1460update_group_shares_cpu(struct task_group *tg, int cpu,
1481 unsigned long sd_shares, unsigned long sd_rq_weight) 1461 unsigned long sd_shares, unsigned long sd_rq_weight)
1482{ 1462{
1483 int boost = 0;
1484 unsigned long shares; 1463 unsigned long shares;
1485 unsigned long rq_weight; 1464 unsigned long rq_weight;
1486 1465
1487 if (!tg->se[cpu]) 1466 if (!tg->se[cpu])
1488 return; 1467 return;
1489 1468
1490 rq_weight = tg->cfs_rq[cpu]->load.weight; 1469 rq_weight = tg->cfs_rq[cpu]->rq_weight;
1491
1492 /*
1493 * If there are currently no tasks on the cpu pretend there is one of
1494 * average load so that when a new task gets to run here it will not
1495 * get delayed by group starvation.
1496 */
1497 if (!rq_weight) {
1498 boost = 1;
1499 rq_weight = NICE_0_LOAD;
1500 }
1501
1502 if (unlikely(rq_weight > sd_rq_weight))
1503 rq_weight = sd_rq_weight;
1504 1470
1505 /* 1471 /*
1506 * \Sum shares * rq_weight 1472 * \Sum shares * rq_weight
@@ -1508,7 +1474,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1508 * \Sum rq_weight 1474 * \Sum rq_weight
1509 * 1475 *
1510 */ 1476 */
1511 shares = (sd_shares * rq_weight) / (sd_rq_weight + 1); 1477 shares = (sd_shares * rq_weight) / sd_rq_weight;
1512 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES); 1478 shares = clamp_t(unsigned long, shares, MIN_SHARES, MAX_SHARES);
1513 1479
1514 if (abs(shares - tg->se[cpu]->load.weight) > 1480 if (abs(shares - tg->se[cpu]->load.weight) >
@@ -1517,11 +1483,7 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1517 unsigned long flags; 1483 unsigned long flags;
1518 1484
1519 spin_lock_irqsave(&rq->lock, flags); 1485 spin_lock_irqsave(&rq->lock, flags);
1520 /* 1486 tg->cfs_rq[cpu]->shares = shares;
1521 * record the actual number of shares, not the boosted amount.
1522 */
1523 tg->cfs_rq[cpu]->shares = boost ? 0 : shares;
1524 tg->cfs_rq[cpu]->rq_weight = rq_weight;
1525 1487
1526 __set_se_shares(tg->se[cpu], shares); 1488 __set_se_shares(tg->se[cpu], shares);
1527 spin_unlock_irqrestore(&rq->lock, flags); 1489 spin_unlock_irqrestore(&rq->lock, flags);
@@ -1535,13 +1497,23 @@ update_group_shares_cpu(struct task_group *tg, int cpu,
1535 */ 1497 */
1536static int tg_shares_up(struct task_group *tg, void *data) 1498static int tg_shares_up(struct task_group *tg, void *data)
1537{ 1499{
1538 unsigned long rq_weight = 0; 1500 unsigned long weight, rq_weight = 0;
1539 unsigned long shares = 0; 1501 unsigned long shares = 0;
1540 struct sched_domain *sd = data; 1502 struct sched_domain *sd = data;
1541 int i; 1503 int i;
1542 1504
1543 for_each_cpu_mask(i, sd->span) { 1505 for_each_cpu(i, sched_domain_span(sd)) {
1544 rq_weight += tg->cfs_rq[i]->load.weight; 1506 /*
1507 * If there are currently no tasks on the cpu pretend there
1508 * is one of average load so that when a new task gets to
1509 * run here it will not get delayed by group starvation.
1510 */
1511 weight = tg->cfs_rq[i]->load.weight;
1512 if (!weight)
1513 weight = NICE_0_LOAD;
1514
1515 tg->cfs_rq[i]->rq_weight = weight;
1516 rq_weight += weight;
1545 shares += tg->cfs_rq[i]->shares; 1517 shares += tg->cfs_rq[i]->shares;
1546 } 1518 }
1547 1519
@@ -1551,10 +1523,7 @@ static int tg_shares_up(struct task_group *tg, void *data)
1551 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE)) 1523 if (!sd->parent || !(sd->parent->flags & SD_LOAD_BALANCE))
1552 shares = tg->shares; 1524 shares = tg->shares;
1553 1525
1554 if (!rq_weight) 1526 for_each_cpu(i, sched_domain_span(sd))
1555 rq_weight = cpus_weight(sd->span) * NICE_0_LOAD;
1556
1557 for_each_cpu_mask(i, sd->span)
1558 update_group_shares_cpu(tg, i, shares, rq_weight); 1527 update_group_shares_cpu(tg, i, shares, rq_weight);
1559 1528
1560 return 0; 1529 return 0;
@@ -2085,15 +2054,17 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2085 int i; 2054 int i;
2086 2055
2087 /* Skip over this group if it has no CPUs allowed */ 2056 /* Skip over this group if it has no CPUs allowed */
2088 if (!cpus_intersects(group->cpumask, p->cpus_allowed)) 2057 if (!cpumask_intersects(sched_group_cpus(group),
2058 &p->cpus_allowed))
2089 continue; 2059 continue;
2090 2060
2091 local_group = cpu_isset(this_cpu, group->cpumask); 2061 local_group = cpumask_test_cpu(this_cpu,
2062 sched_group_cpus(group));
2092 2063
2093 /* Tally up the load of all CPUs in the group */ 2064 /* Tally up the load of all CPUs in the group */
2094 avg_load = 0; 2065 avg_load = 0;
2095 2066
2096 for_each_cpu_mask_nr(i, group->cpumask) { 2067 for_each_cpu(i, sched_group_cpus(group)) {
2097 /* Bias balancing toward cpus of our domain */ 2068 /* Bias balancing toward cpus of our domain */
2098 if (local_group) 2069 if (local_group)
2099 load = source_load(i, load_idx); 2070 load = source_load(i, load_idx);
@@ -2125,17 +2096,14 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2125 * find_idlest_cpu - find the idlest cpu among the cpus in group. 2096 * find_idlest_cpu - find the idlest cpu among the cpus in group.
2126 */ 2097 */
2127static int 2098static int
2128find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu, 2099find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
2129 cpumask_t *tmp)
2130{ 2100{
2131 unsigned long load, min_load = ULONG_MAX; 2101 unsigned long load, min_load = ULONG_MAX;
2132 int idlest = -1; 2102 int idlest = -1;
2133 int i; 2103 int i;
2134 2104
2135 /* Traverse only the allowed CPUs */ 2105 /* Traverse only the allowed CPUs */
2136 cpus_and(*tmp, group->cpumask, p->cpus_allowed); 2106 for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) {
2137
2138 for_each_cpu_mask_nr(i, *tmp) {
2139 load = weighted_cpuload(i); 2107 load = weighted_cpuload(i);
2140 2108
2141 if (load < min_load || (load == min_load && i == this_cpu)) { 2109 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2177,7 +2145,6 @@ static int sched_balance_self(int cpu, int flag)
2177 update_shares(sd); 2145 update_shares(sd);
2178 2146
2179 while (sd) { 2147 while (sd) {
2180 cpumask_t span, tmpmask;
2181 struct sched_group *group; 2148 struct sched_group *group;
2182 int new_cpu, weight; 2149 int new_cpu, weight;
2183 2150
@@ -2186,14 +2153,13 @@ static int sched_balance_self(int cpu, int flag)
2186 continue; 2153 continue;
2187 } 2154 }
2188 2155
2189 span = sd->span;
2190 group = find_idlest_group(sd, t, cpu); 2156 group = find_idlest_group(sd, t, cpu);
2191 if (!group) { 2157 if (!group) {
2192 sd = sd->child; 2158 sd = sd->child;
2193 continue; 2159 continue;
2194 } 2160 }
2195 2161
2196 new_cpu = find_idlest_cpu(group, t, cpu, &tmpmask); 2162 new_cpu = find_idlest_cpu(group, t, cpu);
2197 if (new_cpu == -1 || new_cpu == cpu) { 2163 if (new_cpu == -1 || new_cpu == cpu) {
2198 /* Now try balancing at a lower domain level of cpu */ 2164 /* Now try balancing at a lower domain level of cpu */
2199 sd = sd->child; 2165 sd = sd->child;
@@ -2202,10 +2168,10 @@ static int sched_balance_self(int cpu, int flag)
2202 2168
2203 /* Now try balancing at a lower domain level of new_cpu */ 2169 /* Now try balancing at a lower domain level of new_cpu */
2204 cpu = new_cpu; 2170 cpu = new_cpu;
2171 weight = cpumask_weight(sched_domain_span(sd));
2205 sd = NULL; 2172 sd = NULL;
2206 weight = cpus_weight(span);
2207 for_each_domain(cpu, tmp) { 2173 for_each_domain(cpu, tmp) {
2208 if (weight <= cpus_weight(tmp->span)) 2174 if (weight <= cpumask_weight(sched_domain_span(tmp)))
2209 break; 2175 break;
2210 if (tmp->flags & flag) 2176 if (tmp->flags & flag)
2211 sd = tmp; 2177 sd = tmp;
@@ -2250,7 +2216,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2250 cpu = task_cpu(p); 2216 cpu = task_cpu(p);
2251 2217
2252 for_each_domain(this_cpu, sd) { 2218 for_each_domain(this_cpu, sd) {
2253 if (cpu_isset(cpu, sd->span)) { 2219 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2254 update_shares(sd); 2220 update_shares(sd);
2255 break; 2221 break;
2256 } 2222 }
@@ -2298,7 +2264,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2298 else { 2264 else {
2299 struct sched_domain *sd; 2265 struct sched_domain *sd;
2300 for_each_domain(this_cpu, sd) { 2266 for_each_domain(this_cpu, sd) {
2301 if (cpu_isset(cpu, sd->span)) { 2267 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2302 schedstat_inc(sd, ttwu_wake_remote); 2268 schedstat_inc(sd, ttwu_wake_remote);
2303 break; 2269 break;
2304 } 2270 }
@@ -2844,7 +2810,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2844 return ret; 2810 return ret;
2845} 2811}
2846 2812
2847static void double_unlock_balance(struct rq *this_rq, struct rq *busiest) 2813static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2848 __releases(busiest->lock) 2814 __releases(busiest->lock)
2849{ 2815{
2850 spin_unlock(&busiest->lock); 2816 spin_unlock(&busiest->lock);
@@ -2864,7 +2830,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2864 struct rq *rq; 2830 struct rq *rq;
2865 2831
2866 rq = task_rq_lock(p, &flags); 2832 rq = task_rq_lock(p, &flags);
2867 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2833 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
2868 || unlikely(!cpu_active(dest_cpu))) 2834 || unlikely(!cpu_active(dest_cpu)))
2869 goto out; 2835 goto out;
2870 2836
@@ -2930,7 +2896,7 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2930 * 2) cannot be migrated to this CPU due to cpus_allowed, or 2896 * 2) cannot be migrated to this CPU due to cpus_allowed, or
2931 * 3) are cache-hot on their current CPU. 2897 * 3) are cache-hot on their current CPU.
2932 */ 2898 */
2933 if (!cpu_isset(this_cpu, p->cpus_allowed)) { 2899 if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
2934 schedstat_inc(p, se.nr_failed_migrations_affine); 2900 schedstat_inc(p, se.nr_failed_migrations_affine);
2935 return 0; 2901 return 0;
2936 } 2902 }
@@ -3105,7 +3071,7 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
3105static struct sched_group * 3071static struct sched_group *
3106find_busiest_group(struct sched_domain *sd, int this_cpu, 3072find_busiest_group(struct sched_domain *sd, int this_cpu,
3107 unsigned long *imbalance, enum cpu_idle_type idle, 3073 unsigned long *imbalance, enum cpu_idle_type idle,
3108 int *sd_idle, const cpumask_t *cpus, int *balance) 3074 int *sd_idle, const struct cpumask *cpus, int *balance)
3109{ 3075{
3110 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 3076 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
3111 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 3077 unsigned long max_load, avg_load, total_load, this_load, total_pwr;
@@ -3141,10 +3107,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3141 unsigned long sum_avg_load_per_task; 3107 unsigned long sum_avg_load_per_task;
3142 unsigned long avg_load_per_task; 3108 unsigned long avg_load_per_task;
3143 3109
3144 local_group = cpu_isset(this_cpu, group->cpumask); 3110 local_group = cpumask_test_cpu(this_cpu,
3111 sched_group_cpus(group));
3145 3112
3146 if (local_group) 3113 if (local_group)
3147 balance_cpu = first_cpu(group->cpumask); 3114 balance_cpu = cpumask_first(sched_group_cpus(group));
3148 3115
3149 /* Tally up the load of all CPUs in the group */ 3116 /* Tally up the load of all CPUs in the group */
3150 sum_weighted_load = sum_nr_running = avg_load = 0; 3117 sum_weighted_load = sum_nr_running = avg_load = 0;
@@ -3153,13 +3120,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3153 max_cpu_load = 0; 3120 max_cpu_load = 0;
3154 min_cpu_load = ~0UL; 3121 min_cpu_load = ~0UL;
3155 3122
3156 for_each_cpu_mask_nr(i, group->cpumask) { 3123 for_each_cpu_and(i, sched_group_cpus(group), cpus) {
3157 struct rq *rq; 3124 struct rq *rq = cpu_rq(i);
3158
3159 if (!cpu_isset(i, *cpus))
3160 continue;
3161
3162 rq = cpu_rq(i);
3163 3125
3164 if (*sd_idle && rq->nr_running) 3126 if (*sd_idle && rq->nr_running)
3165 *sd_idle = 0; 3127 *sd_idle = 0;
@@ -3270,8 +3232,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3270 */ 3232 */
3271 if ((sum_nr_running < min_nr_running) || 3233 if ((sum_nr_running < min_nr_running) ||
3272 (sum_nr_running == min_nr_running && 3234 (sum_nr_running == min_nr_running &&
3273 first_cpu(group->cpumask) < 3235 cpumask_first(sched_group_cpus(group)) <
3274 first_cpu(group_min->cpumask))) { 3236 cpumask_first(sched_group_cpus(group_min)))) {
3275 group_min = group; 3237 group_min = group;
3276 min_nr_running = sum_nr_running; 3238 min_nr_running = sum_nr_running;
3277 min_load_per_task = sum_weighted_load / 3239 min_load_per_task = sum_weighted_load /
@@ -3286,8 +3248,8 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3286 if (sum_nr_running <= group_capacity - 1) { 3248 if (sum_nr_running <= group_capacity - 1) {
3287 if (sum_nr_running > leader_nr_running || 3249 if (sum_nr_running > leader_nr_running ||
3288 (sum_nr_running == leader_nr_running && 3250 (sum_nr_running == leader_nr_running &&
3289 first_cpu(group->cpumask) > 3251 cpumask_first(sched_group_cpus(group)) >
3290 first_cpu(group_leader->cpumask))) { 3252 cpumask_first(sched_group_cpus(group_leader)))) {
3291 group_leader = group; 3253 group_leader = group;
3292 leader_nr_running = sum_nr_running; 3254 leader_nr_running = sum_nr_running;
3293 } 3255 }
@@ -3426,16 +3388,16 @@ ret:
3426 */ 3388 */
3427static struct rq * 3389static struct rq *
3428find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, 3390find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3429 unsigned long imbalance, const cpumask_t *cpus) 3391 unsigned long imbalance, const struct cpumask *cpus)
3430{ 3392{
3431 struct rq *busiest = NULL, *rq; 3393 struct rq *busiest = NULL, *rq;
3432 unsigned long max_load = 0; 3394 unsigned long max_load = 0;
3433 int i; 3395 int i;
3434 3396
3435 for_each_cpu_mask_nr(i, group->cpumask) { 3397 for_each_cpu(i, sched_group_cpus(group)) {
3436 unsigned long wl; 3398 unsigned long wl;
3437 3399
3438 if (!cpu_isset(i, *cpus)) 3400 if (!cpumask_test_cpu(i, cpus))
3439 continue; 3401 continue;
3440 3402
3441 rq = cpu_rq(i); 3403 rq = cpu_rq(i);
@@ -3465,7 +3427,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3465 */ 3427 */
3466static int load_balance(int this_cpu, struct rq *this_rq, 3428static int load_balance(int this_cpu, struct rq *this_rq,
3467 struct sched_domain *sd, enum cpu_idle_type idle, 3429 struct sched_domain *sd, enum cpu_idle_type idle,
3468 int *balance, cpumask_t *cpus) 3430 int *balance, struct cpumask *cpus)
3469{ 3431{
3470 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 3432 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
3471 struct sched_group *group; 3433 struct sched_group *group;
@@ -3473,7 +3435,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
3473 struct rq *busiest; 3435 struct rq *busiest;
3474 unsigned long flags; 3436 unsigned long flags;
3475 3437
3476 cpus_setall(*cpus); 3438 cpumask_setall(cpus);
3477 3439
3478 /* 3440 /*
3479 * When power savings policy is enabled for the parent domain, idle 3441 * When power savings policy is enabled for the parent domain, idle
@@ -3533,8 +3495,8 @@ redo:
3533 3495
3534 /* All tasks on this runqueue were pinned by CPU affinity */ 3496 /* All tasks on this runqueue were pinned by CPU affinity */
3535 if (unlikely(all_pinned)) { 3497 if (unlikely(all_pinned)) {
3536 cpu_clear(cpu_of(busiest), *cpus); 3498 cpumask_clear_cpu(cpu_of(busiest), cpus);
3537 if (!cpus_empty(*cpus)) 3499 if (!cpumask_empty(cpus))
3538 goto redo; 3500 goto redo;
3539 goto out_balanced; 3501 goto out_balanced;
3540 } 3502 }
@@ -3551,7 +3513,8 @@ redo:
3551 /* don't kick the migration_thread, if the curr 3513 /* don't kick the migration_thread, if the curr
3552 * task on busiest cpu can't be moved to this_cpu 3514 * task on busiest cpu can't be moved to this_cpu
3553 */ 3515 */
3554 if (!cpu_isset(this_cpu, busiest->curr->cpus_allowed)) { 3516 if (!cpumask_test_cpu(this_cpu,
3517 &busiest->curr->cpus_allowed)) {
3555 spin_unlock_irqrestore(&busiest->lock, flags); 3518 spin_unlock_irqrestore(&busiest->lock, flags);
3556 all_pinned = 1; 3519 all_pinned = 1;
3557 goto out_one_pinned; 3520 goto out_one_pinned;
@@ -3626,7 +3589,7 @@ out:
3626 */ 3589 */
3627static int 3590static int
3628load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, 3591load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3629 cpumask_t *cpus) 3592 struct cpumask *cpus)
3630{ 3593{
3631 struct sched_group *group; 3594 struct sched_group *group;
3632 struct rq *busiest = NULL; 3595 struct rq *busiest = NULL;
@@ -3635,7 +3598,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
3635 int sd_idle = 0; 3598 int sd_idle = 0;
3636 int all_pinned = 0; 3599 int all_pinned = 0;
3637 3600
3638 cpus_setall(*cpus); 3601 cpumask_setall(cpus);
3639 3602
3640 /* 3603 /*
3641 * When power savings policy is enabled for the parent domain, idle 3604 * When power savings policy is enabled for the parent domain, idle
@@ -3679,8 +3642,8 @@ redo:
3679 double_unlock_balance(this_rq, busiest); 3642 double_unlock_balance(this_rq, busiest);
3680 3643
3681 if (unlikely(all_pinned)) { 3644 if (unlikely(all_pinned)) {
3682 cpu_clear(cpu_of(busiest), *cpus); 3645 cpumask_clear_cpu(cpu_of(busiest), cpus);
3683 if (!cpus_empty(*cpus)) 3646 if (!cpumask_empty(cpus))
3684 goto redo; 3647 goto redo;
3685 } 3648 }
3686 } 3649 }
@@ -3715,7 +3678,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3715 struct sched_domain *sd; 3678 struct sched_domain *sd;
3716 int pulled_task = -1; 3679 int pulled_task = -1;
3717 unsigned long next_balance = jiffies + HZ; 3680 unsigned long next_balance = jiffies + HZ;
3718 cpumask_t tmpmask; 3681 cpumask_var_t tmpmask;
3682
3683 if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
3684 return;
3719 3685
3720 for_each_domain(this_cpu, sd) { 3686 for_each_domain(this_cpu, sd) {
3721 unsigned long interval; 3687 unsigned long interval;
@@ -3726,7 +3692,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3726 if (sd->flags & SD_BALANCE_NEWIDLE) 3692 if (sd->flags & SD_BALANCE_NEWIDLE)
3727 /* If we've pulled tasks over stop searching: */ 3693 /* If we've pulled tasks over stop searching: */
3728 pulled_task = load_balance_newidle(this_cpu, this_rq, 3694 pulled_task = load_balance_newidle(this_cpu, this_rq,
3729 sd, &tmpmask); 3695 sd, tmpmask);
3730 3696
3731 interval = msecs_to_jiffies(sd->balance_interval); 3697 interval = msecs_to_jiffies(sd->balance_interval);
3732 if (time_after(next_balance, sd->last_balance + interval)) 3698 if (time_after(next_balance, sd->last_balance + interval))
@@ -3741,6 +3707,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3741 */ 3707 */
3742 this_rq->next_balance = next_balance; 3708 this_rq->next_balance = next_balance;
3743 } 3709 }
3710 free_cpumask_var(tmpmask);
3744} 3711}
3745 3712
3746/* 3713/*
@@ -3778,7 +3745,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3778 /* Search for an sd spanning us and the target CPU. */ 3745 /* Search for an sd spanning us and the target CPU. */
3779 for_each_domain(target_cpu, sd) { 3746 for_each_domain(target_cpu, sd) {
3780 if ((sd->flags & SD_LOAD_BALANCE) && 3747 if ((sd->flags & SD_LOAD_BALANCE) &&
3781 cpu_isset(busiest_cpu, sd->span)) 3748 cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
3782 break; 3749 break;
3783 } 3750 }
3784 3751
@@ -3797,10 +3764,9 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3797#ifdef CONFIG_NO_HZ 3764#ifdef CONFIG_NO_HZ
3798static struct { 3765static struct {
3799 atomic_t load_balancer; 3766 atomic_t load_balancer;
3800 cpumask_t cpu_mask; 3767 cpumask_var_t cpu_mask;
3801} nohz ____cacheline_aligned = { 3768} nohz ____cacheline_aligned = {
3802 .load_balancer = ATOMIC_INIT(-1), 3769 .load_balancer = ATOMIC_INIT(-1),
3803 .cpu_mask = CPU_MASK_NONE,
3804}; 3770};
3805 3771
3806/* 3772/*
@@ -3828,7 +3794,7 @@ int select_nohz_load_balancer(int stop_tick)
3828 int cpu = smp_processor_id(); 3794 int cpu = smp_processor_id();
3829 3795
3830 if (stop_tick) { 3796 if (stop_tick) {
3831 cpu_set(cpu, nohz.cpu_mask); 3797 cpumask_set_cpu(cpu, nohz.cpu_mask);
3832 cpu_rq(cpu)->in_nohz_recently = 1; 3798 cpu_rq(cpu)->in_nohz_recently = 1;
3833 3799
3834 /* 3800 /*
@@ -3842,7 +3808,7 @@ int select_nohz_load_balancer(int stop_tick)
3842 } 3808 }
3843 3809
3844 /* time for ilb owner also to sleep */ 3810 /* time for ilb owner also to sleep */
3845 if (cpus_weight(nohz.cpu_mask) == num_online_cpus()) { 3811 if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
3846 if (atomic_read(&nohz.load_balancer) == cpu) 3812 if (atomic_read(&nohz.load_balancer) == cpu)
3847 atomic_set(&nohz.load_balancer, -1); 3813 atomic_set(&nohz.load_balancer, -1);
3848 return 0; 3814 return 0;
@@ -3855,10 +3821,10 @@ int select_nohz_load_balancer(int stop_tick)
3855 } else if (atomic_read(&nohz.load_balancer) == cpu) 3821 } else if (atomic_read(&nohz.load_balancer) == cpu)
3856 return 1; 3822 return 1;
3857 } else { 3823 } else {
3858 if (!cpu_isset(cpu, nohz.cpu_mask)) 3824 if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
3859 return 0; 3825 return 0;
3860 3826
3861 cpu_clear(cpu, nohz.cpu_mask); 3827 cpumask_clear_cpu(cpu, nohz.cpu_mask);
3862 3828
3863 if (atomic_read(&nohz.load_balancer) == cpu) 3829 if (atomic_read(&nohz.load_balancer) == cpu)
3864 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3830 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
@@ -3886,7 +3852,11 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3886 unsigned long next_balance = jiffies + 60*HZ; 3852 unsigned long next_balance = jiffies + 60*HZ;
3887 int update_next_balance = 0; 3853 int update_next_balance = 0;
3888 int need_serialize; 3854 int need_serialize;
3889 cpumask_t tmp; 3855 cpumask_var_t tmp;
3856
3857 /* Fails alloc? Rebalancing probably not a priority right now. */
3858 if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
3859 return;
3890 3860
3891 for_each_domain(cpu, sd) { 3861 for_each_domain(cpu, sd) {
3892 if (!(sd->flags & SD_LOAD_BALANCE)) 3862 if (!(sd->flags & SD_LOAD_BALANCE))
@@ -3911,7 +3881,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
3911 } 3881 }
3912 3882
3913 if (time_after_eq(jiffies, sd->last_balance + interval)) { 3883 if (time_after_eq(jiffies, sd->last_balance + interval)) {
3914 if (load_balance(cpu, rq, sd, idle, &balance, &tmp)) { 3884 if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
3915 /* 3885 /*
3916 * We've pulled tasks over so either we're no 3886 * We've pulled tasks over so either we're no
3917 * longer idle, or one of our SMT siblings is 3887 * longer idle, or one of our SMT siblings is
@@ -3945,6 +3915,8 @@ out:
3945 */ 3915 */
3946 if (likely(update_next_balance)) 3916 if (likely(update_next_balance))
3947 rq->next_balance = next_balance; 3917 rq->next_balance = next_balance;
3918
3919 free_cpumask_var(tmp);
3948} 3920}
3949 3921
3950/* 3922/*
@@ -3969,12 +3941,13 @@ static void run_rebalance_domains(struct softirq_action *h)
3969 */ 3941 */
3970 if (this_rq->idle_at_tick && 3942 if (this_rq->idle_at_tick &&
3971 atomic_read(&nohz.load_balancer) == this_cpu) { 3943 atomic_read(&nohz.load_balancer) == this_cpu) {
3972 cpumask_t cpus = nohz.cpu_mask;
3973 struct rq *rq; 3944 struct rq *rq;
3974 int balance_cpu; 3945 int balance_cpu;
3975 3946
3976 cpu_clear(this_cpu, cpus); 3947 for_each_cpu(balance_cpu, nohz.cpu_mask) {
3977 for_each_cpu_mask_nr(balance_cpu, cpus) { 3948 if (balance_cpu == this_cpu)
3949 continue;
3950
3978 /* 3951 /*
3979 * If this cpu gets work to do, stop the load balancing 3952 * If this cpu gets work to do, stop the load balancing
3980 * work being done for other cpus. Next load 3953 * work being done for other cpus. Next load
@@ -4012,7 +3985,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4012 rq->in_nohz_recently = 0; 3985 rq->in_nohz_recently = 0;
4013 3986
4014 if (atomic_read(&nohz.load_balancer) == cpu) { 3987 if (atomic_read(&nohz.load_balancer) == cpu) {
4015 cpu_clear(cpu, nohz.cpu_mask); 3988 cpumask_clear_cpu(cpu, nohz.cpu_mask);
4016 atomic_set(&nohz.load_balancer, -1); 3989 atomic_set(&nohz.load_balancer, -1);
4017 } 3990 }
4018 3991
@@ -4025,7 +3998,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4025 * TBD: Traverse the sched domains and nominate 3998 * TBD: Traverse the sched domains and nominate
4026 * the nearest cpu in the nohz.cpu_mask. 3999 * the nearest cpu in the nohz.cpu_mask.
4027 */ 4000 */
4028 int ilb = first_cpu(nohz.cpu_mask); 4001 int ilb = cpumask_first(nohz.cpu_mask);
4029 4002
4030 if (ilb < nr_cpu_ids) 4003 if (ilb < nr_cpu_ids)
4031 resched_cpu(ilb); 4004 resched_cpu(ilb);
@@ -4037,7 +4010,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4037 * cpus with ticks stopped, is it time for that to stop? 4010 * cpus with ticks stopped, is it time for that to stop?
4038 */ 4011 */
4039 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu && 4012 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) == cpu &&
4040 cpus_weight(nohz.cpu_mask) == num_online_cpus()) { 4013 cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {
4041 resched_cpu(cpu); 4014 resched_cpu(cpu);
4042 return; 4015 return;
4043 } 4016 }
@@ -4047,7 +4020,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
4047 * someone else, then no need raise the SCHED_SOFTIRQ 4020 * someone else, then no need raise the SCHED_SOFTIRQ
4048 */ 4021 */
4049 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu && 4022 if (rq->idle_at_tick && atomic_read(&nohz.load_balancer) != cpu &&
4050 cpu_isset(cpu, nohz.cpu_mask)) 4023 cpumask_test_cpu(cpu, nohz.cpu_mask))
4051 return; 4024 return;
4052#endif 4025#endif
4053 if (time_after_eq(jiffies, rq->next_balance)) 4026 if (time_after_eq(jiffies, rq->next_balance))
@@ -4209,7 +4182,6 @@ void account_steal_time(struct task_struct *p, cputime_t steal)
4209 4182
4210 if (p == rq->idle) { 4183 if (p == rq->idle) {
4211 p->stime = cputime_add(p->stime, steal); 4184 p->stime = cputime_add(p->stime, steal);
4212 account_group_system_time(p, steal);
4213 if (atomic_read(&rq->nr_iowait) > 0) 4185 if (atomic_read(&rq->nr_iowait) > 0)
4214 cpustat->iowait = cputime64_add(cpustat->iowait, tmp); 4186 cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
4215 else 4187 else
@@ -4345,7 +4317,7 @@ void __kprobes sub_preempt_count(int val)
4345 /* 4317 /*
4346 * Underflow? 4318 * Underflow?
4347 */ 4319 */
4348 if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) 4320 if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked())))
4349 return; 4321 return;
4350 /* 4322 /*
4351 * Is the spinlock portion underflowing? 4323 * Is the spinlock portion underflowing?
@@ -5406,10 +5378,9 @@ out_unlock:
5406 return retval; 5378 return retval;
5407} 5379}
5408 5380
5409long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) 5381long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5410{ 5382{
5411 cpumask_t cpus_allowed; 5383 cpumask_var_t cpus_allowed, new_mask;
5412 cpumask_t new_mask = *in_mask;
5413 struct task_struct *p; 5384 struct task_struct *p;
5414 int retval; 5385 int retval;
5415 5386
@@ -5431,6 +5402,14 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
5431 get_task_struct(p); 5402 get_task_struct(p);
5432 read_unlock(&tasklist_lock); 5403 read_unlock(&tasklist_lock);
5433 5404
5405 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
5406 retval = -ENOMEM;
5407 goto out_put_task;
5408 }
5409 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) {
5410 retval = -ENOMEM;
5411 goto out_free_cpus_allowed;
5412 }
5434 retval = -EPERM; 5413 retval = -EPERM;
5435 if ((current->euid != p->euid) && (current->euid != p->uid) && 5414 if ((current->euid != p->euid) && (current->euid != p->uid) &&
5436 !capable(CAP_SYS_NICE)) 5415 !capable(CAP_SYS_NICE))
@@ -5440,37 +5419,41 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask)
5440 if (retval) 5419 if (retval)
5441 goto out_unlock; 5420 goto out_unlock;
5442 5421
5443 cpuset_cpus_allowed(p, &cpus_allowed); 5422 cpuset_cpus_allowed(p, cpus_allowed);
5444 cpus_and(new_mask, new_mask, cpus_allowed); 5423 cpumask_and(new_mask, in_mask, cpus_allowed);
5445 again: 5424 again:
5446 retval = set_cpus_allowed_ptr(p, &new_mask); 5425 retval = set_cpus_allowed_ptr(p, new_mask);
5447 5426
5448 if (!retval) { 5427 if (!retval) {
5449 cpuset_cpus_allowed(p, &cpus_allowed); 5428 cpuset_cpus_allowed(p, cpus_allowed);
5450 if (!cpus_subset(new_mask, cpus_allowed)) { 5429 if (!cpumask_subset(new_mask, cpus_allowed)) {
5451 /* 5430 /*
5452 * We must have raced with a concurrent cpuset 5431 * We must have raced with a concurrent cpuset
5453 * update. Just reset the cpus_allowed to the 5432 * update. Just reset the cpus_allowed to the
5454 * cpuset's cpus_allowed 5433 * cpuset's cpus_allowed
5455 */ 5434 */
5456 new_mask = cpus_allowed; 5435 cpumask_copy(new_mask, cpus_allowed);
5457 goto again; 5436 goto again;
5458 } 5437 }
5459 } 5438 }
5460out_unlock: 5439out_unlock:
5440 free_cpumask_var(new_mask);
5441out_free_cpus_allowed:
5442 free_cpumask_var(cpus_allowed);
5443out_put_task:
5461 put_task_struct(p); 5444 put_task_struct(p);
5462 put_online_cpus(); 5445 put_online_cpus();
5463 return retval; 5446 return retval;
5464} 5447}
5465 5448
5466static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, 5449static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5467 cpumask_t *new_mask) 5450 struct cpumask *new_mask)
5468{ 5451{
5469 if (len < sizeof(cpumask_t)) { 5452 if (len < cpumask_size())
5470 memset(new_mask, 0, sizeof(cpumask_t)); 5453 cpumask_clear(new_mask);
5471 } else if (len > sizeof(cpumask_t)) { 5454 else if (len > cpumask_size())
5472 len = sizeof(cpumask_t); 5455 len = cpumask_size();
5473 } 5456
5474 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0; 5457 return copy_from_user(new_mask, user_mask_ptr, len) ? -EFAULT : 0;
5475} 5458}
5476 5459
@@ -5483,17 +5466,20 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
5483asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, 5466asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len,
5484 unsigned long __user *user_mask_ptr) 5467 unsigned long __user *user_mask_ptr)
5485{ 5468{
5486 cpumask_t new_mask; 5469 cpumask_var_t new_mask;
5487 int retval; 5470 int retval;
5488 5471
5489 retval = get_user_cpu_mask(user_mask_ptr, len, &new_mask); 5472 if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
5490 if (retval) 5473 return -ENOMEM;
5491 return retval;
5492 5474
5493 return sched_setaffinity(pid, &new_mask); 5475 retval = get_user_cpu_mask(user_mask_ptr, len, new_mask);
5476 if (retval == 0)
5477 retval = sched_setaffinity(pid, new_mask);
5478 free_cpumask_var(new_mask);
5479 return retval;
5494} 5480}
5495 5481
5496long sched_getaffinity(pid_t pid, cpumask_t *mask) 5482long sched_getaffinity(pid_t pid, struct cpumask *mask)
5497{ 5483{
5498 struct task_struct *p; 5484 struct task_struct *p;
5499 int retval; 5485 int retval;
@@ -5510,7 +5496,7 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
5510 if (retval) 5496 if (retval)
5511 goto out_unlock; 5497 goto out_unlock;
5512 5498
5513 cpus_and(*mask, p->cpus_allowed, cpu_online_map); 5499 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5514 5500
5515out_unlock: 5501out_unlock:
5516 read_unlock(&tasklist_lock); 5502 read_unlock(&tasklist_lock);
@@ -5529,19 +5515,24 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len,
5529 unsigned long __user *user_mask_ptr) 5515 unsigned long __user *user_mask_ptr)
5530{ 5516{
5531 int ret; 5517 int ret;
5532 cpumask_t mask; 5518 cpumask_var_t mask;
5533 5519
5534 if (len < sizeof(cpumask_t)) 5520 if (len < cpumask_size())
5535 return -EINVAL; 5521 return -EINVAL;
5536 5522
5537 ret = sched_getaffinity(pid, &mask); 5523 if (!alloc_cpumask_var(&mask, GFP_KERNEL))
5538 if (ret < 0) 5524 return -ENOMEM;
5539 return ret;
5540 5525
5541 if (copy_to_user(user_mask_ptr, &mask, sizeof(cpumask_t))) 5526 ret = sched_getaffinity(pid, mask);
5542 return -EFAULT; 5527 if (ret == 0) {
5528 if (copy_to_user(user_mask_ptr, mask, cpumask_size()))
5529 ret = -EFAULT;
5530 else
5531 ret = cpumask_size();
5532 }
5533 free_cpumask_var(mask);
5543 5534
5544 return sizeof(cpumask_t); 5535 return ret;
5545} 5536}
5546 5537
5547/** 5538/**
@@ -5883,7 +5874,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5883 idle->se.exec_start = sched_clock(); 5874 idle->se.exec_start = sched_clock();
5884 5875
5885 idle->prio = idle->normal_prio = MAX_PRIO; 5876 idle->prio = idle->normal_prio = MAX_PRIO;
5886 idle->cpus_allowed = cpumask_of_cpu(cpu); 5877 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
5887 __set_task_cpu(idle, cpu); 5878 __set_task_cpu(idle, cpu);
5888 5879
5889 rq->curr = rq->idle = idle; 5880 rq->curr = rq->idle = idle;
@@ -5910,9 +5901,9 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5910 * indicates which cpus entered this state. This is used 5901 * indicates which cpus entered this state. This is used
5911 * in the rcu update to wait only for active cpus. For system 5902 * in the rcu update to wait only for active cpus. For system
5912 * which do not switch off the HZ timer nohz_cpu_mask should 5903 * which do not switch off the HZ timer nohz_cpu_mask should
5913 * always be CPU_MASK_NONE. 5904 * always be CPU_BITS_NONE.
5914 */ 5905 */
5915cpumask_t nohz_cpu_mask = CPU_MASK_NONE; 5906cpumask_var_t nohz_cpu_mask;
5916 5907
5917/* 5908/*
5918 * Increase the granularity value when there are more CPUs, 5909 * Increase the granularity value when there are more CPUs,
@@ -5967,7 +5958,7 @@ static inline void sched_init_granularity(void)
5967 * task must not exit() & deallocate itself prematurely. The 5958 * task must not exit() & deallocate itself prematurely. The
5968 * call is not atomic; no spinlocks may be held. 5959 * call is not atomic; no spinlocks may be held.
5969 */ 5960 */
5970int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask) 5961int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5971{ 5962{
5972 struct migration_req req; 5963 struct migration_req req;
5973 unsigned long flags; 5964 unsigned long flags;
@@ -5975,13 +5966,13 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
5975 int ret = 0; 5966 int ret = 0;
5976 5967
5977 rq = task_rq_lock(p, &flags); 5968 rq = task_rq_lock(p, &flags);
5978 if (!cpus_intersects(*new_mask, cpu_online_map)) { 5969 if (!cpumask_intersects(new_mask, cpu_online_mask)) {
5979 ret = -EINVAL; 5970 ret = -EINVAL;
5980 goto out; 5971 goto out;
5981 } 5972 }
5982 5973
5983 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && 5974 if (unlikely((p->flags & PF_THREAD_BOUND) && p != current &&
5984 !cpus_equal(p->cpus_allowed, *new_mask))) { 5975 !cpumask_equal(&p->cpus_allowed, new_mask))) {
5985 ret = -EINVAL; 5976 ret = -EINVAL;
5986 goto out; 5977 goto out;
5987 } 5978 }
@@ -5989,15 +5980,15 @@ int set_cpus_allowed_ptr(struct task_struct *p, const cpumask_t *new_mask)
5989 if (p->sched_class->set_cpus_allowed) 5980 if (p->sched_class->set_cpus_allowed)
5990 p->sched_class->set_cpus_allowed(p, new_mask); 5981 p->sched_class->set_cpus_allowed(p, new_mask);
5991 else { 5982 else {
5992 p->cpus_allowed = *new_mask; 5983 cpumask_copy(&p->cpus_allowed, new_mask);
5993 p->rt.nr_cpus_allowed = cpus_weight(*new_mask); 5984 p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
5994 } 5985 }
5995 5986
5996 /* Can the task run on the task's current CPU? If so, we're done */ 5987 /* Can the task run on the task's current CPU? If so, we're done */
5997 if (cpu_isset(task_cpu(p), *new_mask)) 5988 if (cpumask_test_cpu(task_cpu(p), new_mask))
5998 goto out; 5989 goto out;
5999 5990
6000 if (migrate_task(p, any_online_cpu(*new_mask), &req)) { 5991 if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) {
6001 /* Need help from migration thread: drop lock and wait. */ 5992 /* Need help from migration thread: drop lock and wait. */
6002 task_rq_unlock(rq, &flags); 5993 task_rq_unlock(rq, &flags);
6003 wake_up_process(rq->migration_thread); 5994 wake_up_process(rq->migration_thread);
@@ -6039,7 +6030,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
6039 if (task_cpu(p) != src_cpu) 6030 if (task_cpu(p) != src_cpu)
6040 goto done; 6031 goto done;
6041 /* Affinity changed (again). */ 6032 /* Affinity changed (again). */
6042 if (!cpu_isset(dest_cpu, p->cpus_allowed)) 6033 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6043 goto fail; 6034 goto fail;
6044 6035
6045 on_rq = p->se.on_rq; 6036 on_rq = p->se.on_rq;
@@ -6133,54 +6124,46 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
6133 6124
6134/* 6125/*
6135 * Figure out where task on dead CPU should go, use force if necessary. 6126 * Figure out where task on dead CPU should go, use force if necessary.
6136 * NOTE: interrupts should be disabled by the caller
6137 */ 6127 */
6138static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 6128static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6139{ 6129{
6140 unsigned long flags;
6141 cpumask_t mask;
6142 struct rq *rq;
6143 int dest_cpu; 6130 int dest_cpu;
6131 /* FIXME: Use cpumask_of_node here. */
6132 cpumask_t _nodemask = node_to_cpumask(cpu_to_node(dead_cpu));
6133 const struct cpumask *nodemask = &_nodemask;
6134
6135again:
6136 /* Look for allowed, online CPU in same node. */
6137 for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask)
6138 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
6139 goto move;
6140
6141 /* Any allowed, online CPU? */
6142 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask);
6143 if (dest_cpu < nr_cpu_ids)
6144 goto move;
6145
6146 /* No more Mr. Nice Guy. */
6147 if (dest_cpu >= nr_cpu_ids) {
6148 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
6149 dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed);
6144 6150
6145 do { 6151 /*
6146 /* On same node? */ 6152 * Don't tell them about moving exiting tasks or
6147 mask = node_to_cpumask(cpu_to_node(dead_cpu)); 6153 * kernel threads (both mm NULL), since they never
6148 cpus_and(mask, mask, p->cpus_allowed); 6154 * leave kernel.
6149 dest_cpu = any_online_cpu(mask); 6155 */
6150 6156 if (p->mm && printk_ratelimit()) {
6151 /* On any allowed CPU? */ 6157 printk(KERN_INFO "process %d (%s) no "
6152 if (dest_cpu >= nr_cpu_ids) 6158 "longer affine to cpu%d\n",
6153 dest_cpu = any_online_cpu(p->cpus_allowed); 6159 task_pid_nr(p), p->comm, dead_cpu);
6154
6155 /* No more Mr. Nice Guy. */
6156 if (dest_cpu >= nr_cpu_ids) {
6157 cpumask_t cpus_allowed;
6158
6159 cpuset_cpus_allowed_locked(p, &cpus_allowed);
6160 /*
6161 * Try to stay on the same cpuset, where the
6162 * current cpuset may be a subset of all cpus.
6163 * The cpuset_cpus_allowed_locked() variant of
6164 * cpuset_cpus_allowed() will not block. It must be
6165 * called within calls to cpuset_lock/cpuset_unlock.
6166 */
6167 rq = task_rq_lock(p, &flags);
6168 p->cpus_allowed = cpus_allowed;
6169 dest_cpu = any_online_cpu(p->cpus_allowed);
6170 task_rq_unlock(rq, &flags);
6171
6172 /*
6173 * Don't tell them about moving exiting tasks or
6174 * kernel threads (both mm NULL), since they never
6175 * leave kernel.
6176 */
6177 if (p->mm && printk_ratelimit()) {
6178 printk(KERN_INFO "process %d (%s) no "
6179 "longer affine to cpu%d\n",
6180 task_pid_nr(p), p->comm, dead_cpu);
6181 }
6182 } 6160 }
6183 } while (!__migrate_task_irq(p, dead_cpu, dest_cpu)); 6161 }
6162
6163move:
6164 /* It can have affinity changed while we were choosing. */
6165 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
6166 goto again;
6184} 6167}
6185 6168
6186/* 6169/*
@@ -6192,7 +6175,7 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
6192 */ 6175 */
6193static void migrate_nr_uninterruptible(struct rq *rq_src) 6176static void migrate_nr_uninterruptible(struct rq *rq_src)
6194{ 6177{
6195 struct rq *rq_dest = cpu_rq(any_online_cpu(*CPU_MASK_ALL_PTR)); 6178 struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask));
6196 unsigned long flags; 6179 unsigned long flags;
6197 6180
6198 local_irq_save(flags); 6181 local_irq_save(flags);
@@ -6482,7 +6465,7 @@ static void set_rq_online(struct rq *rq)
6482 if (!rq->online) { 6465 if (!rq->online) {
6483 const struct sched_class *class; 6466 const struct sched_class *class;
6484 6467
6485 cpu_set(rq->cpu, rq->rd->online); 6468 cpumask_set_cpu(rq->cpu, rq->rd->online);
6486 rq->online = 1; 6469 rq->online = 1;
6487 6470
6488 for_each_class(class) { 6471 for_each_class(class) {
@@ -6502,7 +6485,7 @@ static void set_rq_offline(struct rq *rq)
6502 class->rq_offline(rq); 6485 class->rq_offline(rq);
6503 } 6486 }
6504 6487
6505 cpu_clear(rq->cpu, rq->rd->online); 6488 cpumask_clear_cpu(rq->cpu, rq->rd->online);
6506 rq->online = 0; 6489 rq->online = 0;
6507 } 6490 }
6508} 6491}
@@ -6543,7 +6526,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6543 rq = cpu_rq(cpu); 6526 rq = cpu_rq(cpu);
6544 spin_lock_irqsave(&rq->lock, flags); 6527 spin_lock_irqsave(&rq->lock, flags);
6545 if (rq->rd) { 6528 if (rq->rd) {
6546 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6529 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6547 6530
6548 set_rq_online(rq); 6531 set_rq_online(rq);
6549 } 6532 }
@@ -6557,7 +6540,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6557 break; 6540 break;
6558 /* Unbind it from offline cpu so it can run. Fall thru. */ 6541 /* Unbind it from offline cpu so it can run. Fall thru. */
6559 kthread_bind(cpu_rq(cpu)->migration_thread, 6542 kthread_bind(cpu_rq(cpu)->migration_thread,
6560 any_online_cpu(cpu_online_map)); 6543 cpumask_any(cpu_online_mask));
6561 kthread_stop(cpu_rq(cpu)->migration_thread); 6544 kthread_stop(cpu_rq(cpu)->migration_thread);
6562 cpu_rq(cpu)->migration_thread = NULL; 6545 cpu_rq(cpu)->migration_thread = NULL;
6563 break; 6546 break;
@@ -6605,7 +6588,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6605 rq = cpu_rq(cpu); 6588 rq = cpu_rq(cpu);
6606 spin_lock_irqsave(&rq->lock, flags); 6589 spin_lock_irqsave(&rq->lock, flags);
6607 if (rq->rd) { 6590 if (rq->rd) {
6608 BUG_ON(!cpu_isset(cpu, rq->rd->span)); 6591 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
6609 set_rq_offline(rq); 6592 set_rq_offline(rq);
6610 } 6593 }
6611 spin_unlock_irqrestore(&rq->lock, flags); 6594 spin_unlock_irqrestore(&rq->lock, flags);
@@ -6643,36 +6626,14 @@ early_initcall(migration_init);
6643 6626
6644#ifdef CONFIG_SCHED_DEBUG 6627#ifdef CONFIG_SCHED_DEBUG
6645 6628
6646static inline const char *sd_level_to_string(enum sched_domain_level lvl)
6647{
6648 switch (lvl) {
6649 case SD_LV_NONE:
6650 return "NONE";
6651 case SD_LV_SIBLING:
6652 return "SIBLING";
6653 case SD_LV_MC:
6654 return "MC";
6655 case SD_LV_CPU:
6656 return "CPU";
6657 case SD_LV_NODE:
6658 return "NODE";
6659 case SD_LV_ALLNODES:
6660 return "ALLNODES";
6661 case SD_LV_MAX:
6662 return "MAX";
6663
6664 }
6665 return "MAX";
6666}
6667
6668static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, 6629static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6669 cpumask_t *groupmask) 6630 struct cpumask *groupmask)
6670{ 6631{
6671 struct sched_group *group = sd->groups; 6632 struct sched_group *group = sd->groups;
6672 char str[256]; 6633 char str[256];
6673 6634
6674 cpulist_scnprintf(str, sizeof(str), sd->span); 6635 cpulist_scnprintf(str, sizeof(str), *sched_domain_span(sd));
6675 cpus_clear(*groupmask); 6636 cpumask_clear(groupmask);
6676 6637
6677 printk(KERN_DEBUG "%*s domain %d: ", level, "", level); 6638 printk(KERN_DEBUG "%*s domain %d: ", level, "", level);
6678 6639
@@ -6684,14 +6645,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6684 return -1; 6645 return -1;
6685 } 6646 }
6686 6647
6687 printk(KERN_CONT "span %s level %s\n", 6648 printk(KERN_CONT "span %s level %s\n", str, sd->name);
6688 str, sd_level_to_string(sd->level));
6689 6649
6690 if (!cpu_isset(cpu, sd->span)) { 6650 if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
6691 printk(KERN_ERR "ERROR: domain->span does not contain " 6651 printk(KERN_ERR "ERROR: domain->span does not contain "
6692 "CPU%d\n", cpu); 6652 "CPU%d\n", cpu);
6693 } 6653 }
6694 if (!cpu_isset(cpu, group->cpumask)) { 6654 if (!cpumask_test_cpu(cpu, sched_group_cpus(group))) {
6695 printk(KERN_ERR "ERROR: domain->groups does not contain" 6655 printk(KERN_ERR "ERROR: domain->groups does not contain"
6696 " CPU%d\n", cpu); 6656 " CPU%d\n", cpu);
6697 } 6657 }
@@ -6711,31 +6671,32 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6711 break; 6671 break;
6712 } 6672 }
6713 6673
6714 if (!cpus_weight(group->cpumask)) { 6674 if (!cpumask_weight(sched_group_cpus(group))) {
6715 printk(KERN_CONT "\n"); 6675 printk(KERN_CONT "\n");
6716 printk(KERN_ERR "ERROR: empty group\n"); 6676 printk(KERN_ERR "ERROR: empty group\n");
6717 break; 6677 break;
6718 } 6678 }
6719 6679
6720 if (cpus_intersects(*groupmask, group->cpumask)) { 6680 if (cpumask_intersects(groupmask, sched_group_cpus(group))) {
6721 printk(KERN_CONT "\n"); 6681 printk(KERN_CONT "\n");
6722 printk(KERN_ERR "ERROR: repeated CPUs\n"); 6682 printk(KERN_ERR "ERROR: repeated CPUs\n");
6723 break; 6683 break;
6724 } 6684 }
6725 6685
6726 cpus_or(*groupmask, *groupmask, group->cpumask); 6686 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
6727 6687
6728 cpulist_scnprintf(str, sizeof(str), group->cpumask); 6688 cpulist_scnprintf(str, sizeof(str), *sched_group_cpus(group));
6729 printk(KERN_CONT " %s", str); 6689 printk(KERN_CONT " %s", str);
6730 6690
6731 group = group->next; 6691 group = group->next;
6732 } while (group != sd->groups); 6692 } while (group != sd->groups);
6733 printk(KERN_CONT "\n"); 6693 printk(KERN_CONT "\n");
6734 6694
6735 if (!cpus_equal(sd->span, *groupmask)) 6695 if (!cpumask_equal(sched_domain_span(sd), groupmask))
6736 printk(KERN_ERR "ERROR: groups don't span domain->span\n"); 6696 printk(KERN_ERR "ERROR: groups don't span domain->span\n");
6737 6697
6738 if (sd->parent && !cpus_subset(*groupmask, sd->parent->span)) 6698 if (sd->parent &&
6699 !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
6739 printk(KERN_ERR "ERROR: parent span is not a superset " 6700 printk(KERN_ERR "ERROR: parent span is not a superset "
6740 "of domain->span\n"); 6701 "of domain->span\n");
6741 return 0; 6702 return 0;
@@ -6743,7 +6704,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
6743 6704
6744static void sched_domain_debug(struct sched_domain *sd, int cpu) 6705static void sched_domain_debug(struct sched_domain *sd, int cpu)
6745{ 6706{
6746 cpumask_t *groupmask; 6707 cpumask_var_t groupmask;
6747 int level = 0; 6708 int level = 0;
6748 6709
6749 if (!sd) { 6710 if (!sd) {
@@ -6753,8 +6714,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6753 6714
6754 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); 6715 printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu);
6755 6716
6756 groupmask = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 6717 if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) {
6757 if (!groupmask) {
6758 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); 6718 printk(KERN_DEBUG "Cannot load-balance (out of memory)\n");
6759 return; 6719 return;
6760 } 6720 }
@@ -6767,7 +6727,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6767 if (!sd) 6727 if (!sd)
6768 break; 6728 break;
6769 } 6729 }
6770 kfree(groupmask); 6730 free_cpumask_var(groupmask);
6771} 6731}
6772#else /* !CONFIG_SCHED_DEBUG */ 6732#else /* !CONFIG_SCHED_DEBUG */
6773# define sched_domain_debug(sd, cpu) do { } while (0) 6733# define sched_domain_debug(sd, cpu) do { } while (0)
@@ -6775,7 +6735,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
6775 6735
6776static int sd_degenerate(struct sched_domain *sd) 6736static int sd_degenerate(struct sched_domain *sd)
6777{ 6737{
6778 if (cpus_weight(sd->span) == 1) 6738 if (cpumask_weight(sched_domain_span(sd)) == 1)
6779 return 1; 6739 return 1;
6780 6740
6781 /* Following flags need at least 2 groups */ 6741 /* Following flags need at least 2 groups */
@@ -6806,7 +6766,7 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6806 if (sd_degenerate(parent)) 6766 if (sd_degenerate(parent))
6807 return 1; 6767 return 1;
6808 6768
6809 if (!cpus_equal(sd->span, parent->span)) 6769 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
6810 return 0; 6770 return 0;
6811 6771
6812 /* Does parent contain flags not in child? */ 6772 /* Does parent contain flags not in child? */
@@ -6828,6 +6788,16 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
6828 return 1; 6788 return 1;
6829} 6789}
6830 6790
6791static void free_rootdomain(struct root_domain *rd)
6792{
6793 cpupri_cleanup(&rd->cpupri);
6794
6795 free_cpumask_var(rd->rto_mask);
6796 free_cpumask_var(rd->online);
6797 free_cpumask_var(rd->span);
6798 kfree(rd);
6799}
6800
6831static void rq_attach_root(struct rq *rq, struct root_domain *rd) 6801static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6832{ 6802{
6833 unsigned long flags; 6803 unsigned long flags;
@@ -6837,38 +6807,63 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
6837 if (rq->rd) { 6807 if (rq->rd) {
6838 struct root_domain *old_rd = rq->rd; 6808 struct root_domain *old_rd = rq->rd;
6839 6809
6840 if (cpu_isset(rq->cpu, old_rd->online)) 6810 if (cpumask_test_cpu(rq->cpu, old_rd->online))
6841 set_rq_offline(rq); 6811 set_rq_offline(rq);
6842 6812
6843 cpu_clear(rq->cpu, old_rd->span); 6813 cpumask_clear_cpu(rq->cpu, old_rd->span);
6844 6814
6845 if (atomic_dec_and_test(&old_rd->refcount)) 6815 if (atomic_dec_and_test(&old_rd->refcount))
6846 kfree(old_rd); 6816 free_rootdomain(old_rd);
6847 } 6817 }
6848 6818
6849 atomic_inc(&rd->refcount); 6819 atomic_inc(&rd->refcount);
6850 rq->rd = rd; 6820 rq->rd = rd;
6851 6821
6852 cpu_set(rq->cpu, rd->span); 6822 cpumask_set_cpu(rq->cpu, rd->span);
6853 if (cpu_isset(rq->cpu, cpu_online_map)) 6823 if (cpumask_test_cpu(rq->cpu, cpu_online_mask))
6854 set_rq_online(rq); 6824 set_rq_online(rq);
6855 6825
6856 spin_unlock_irqrestore(&rq->lock, flags); 6826 spin_unlock_irqrestore(&rq->lock, flags);
6857} 6827}
6858 6828
6859static void init_rootdomain(struct root_domain *rd) 6829static int init_rootdomain(struct root_domain *rd, bool bootmem)
6860{ 6830{
6861 memset(rd, 0, sizeof(*rd)); 6831 memset(rd, 0, sizeof(*rd));
6862 6832
6863 cpus_clear(rd->span); 6833 if (bootmem) {
6864 cpus_clear(rd->online); 6834 alloc_bootmem_cpumask_var(&def_root_domain.span);
6835 alloc_bootmem_cpumask_var(&def_root_domain.online);
6836 alloc_bootmem_cpumask_var(&def_root_domain.rto_mask);
6837 cpupri_init(&rd->cpupri, true);
6838 return 0;
6839 }
6840
6841 if (!alloc_cpumask_var(&rd->span, GFP_KERNEL))
6842 goto free_rd;
6843 if (!alloc_cpumask_var(&rd->online, GFP_KERNEL))
6844 goto free_span;
6845 if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
6846 goto free_online;
6847
6848 if (cpupri_init(&rd->cpupri, false) != 0)
6849 goto free_rto_mask;
6850 return 0;
6865 6851
6866 cpupri_init(&rd->cpupri); 6852free_rto_mask:
6853 free_cpumask_var(rd->rto_mask);
6854free_online:
6855 free_cpumask_var(rd->online);
6856free_span:
6857 free_cpumask_var(rd->span);
6858free_rd:
6859 kfree(rd);
6860 return -ENOMEM;
6867} 6861}
6868 6862
6869static void init_defrootdomain(void) 6863static void init_defrootdomain(void)
6870{ 6864{
6871 init_rootdomain(&def_root_domain); 6865 init_rootdomain(&def_root_domain, true);
6866
6872 atomic_set(&def_root_domain.refcount, 1); 6867 atomic_set(&def_root_domain.refcount, 1);
6873} 6868}
6874 6869
@@ -6880,7 +6875,10 @@ static struct root_domain *alloc_rootdomain(void)
6880 if (!rd) 6875 if (!rd)
6881 return NULL; 6876 return NULL;
6882 6877
6883 init_rootdomain(rd); 6878 if (init_rootdomain(rd, false) != 0) {
6879 kfree(rd);
6880 return NULL;
6881 }
6884 6882
6885 return rd; 6883 return rd;
6886} 6884}
@@ -6922,19 +6920,12 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
6922} 6920}
6923 6921
6924/* cpus with isolated domains */ 6922/* cpus with isolated domains */
6925static cpumask_t cpu_isolated_map = CPU_MASK_NONE; 6923static cpumask_var_t cpu_isolated_map;
6926 6924
6927/* Setup the mask of cpus configured for isolated domains */ 6925/* Setup the mask of cpus configured for isolated domains */
6928static int __init isolated_cpu_setup(char *str) 6926static int __init isolated_cpu_setup(char *str)
6929{ 6927{
6930 static int __initdata ints[NR_CPUS]; 6928 cpulist_parse(str, *cpu_isolated_map);
6931 int i;
6932
6933 str = get_options(str, ARRAY_SIZE(ints), ints);
6934 cpus_clear(cpu_isolated_map);
6935 for (i = 1; i <= ints[0]; i++)
6936 if (ints[i] < NR_CPUS)
6937 cpu_set(ints[i], cpu_isolated_map);
6938 return 1; 6929 return 1;
6939} 6930}
6940 6931
@@ -6943,42 +6934,43 @@ __setup("isolcpus=", isolated_cpu_setup);
6943/* 6934/*
6944 * init_sched_build_groups takes the cpumask we wish to span, and a pointer 6935 * init_sched_build_groups takes the cpumask we wish to span, and a pointer
6945 * to a function which identifies what group(along with sched group) a CPU 6936 * to a function which identifies what group(along with sched group) a CPU
6946 * belongs to. The return value of group_fn must be a >= 0 and < NR_CPUS 6937 * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6947 * (due to the fact that we keep track of groups covered with a cpumask_t). 6938 * (due to the fact that we keep track of groups covered with a struct cpumask).
6948 * 6939 *
6949 * init_sched_build_groups will build a circular linked list of the groups 6940 * init_sched_build_groups will build a circular linked list of the groups
6950 * covered by the given span, and will set each group's ->cpumask correctly, 6941 * covered by the given span, and will set each group's ->cpumask correctly,
6951 * and ->cpu_power to 0. 6942 * and ->cpu_power to 0.
6952 */ 6943 */
6953static void 6944static void
6954init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map, 6945init_sched_build_groups(const struct cpumask *span,
6955 int (*group_fn)(int cpu, const cpumask_t *cpu_map, 6946 const struct cpumask *cpu_map,
6947 int (*group_fn)(int cpu, const struct cpumask *cpu_map,
6956 struct sched_group **sg, 6948 struct sched_group **sg,
6957 cpumask_t *tmpmask), 6949 struct cpumask *tmpmask),
6958 cpumask_t *covered, cpumask_t *tmpmask) 6950 struct cpumask *covered, struct cpumask *tmpmask)
6959{ 6951{
6960 struct sched_group *first = NULL, *last = NULL; 6952 struct sched_group *first = NULL, *last = NULL;
6961 int i; 6953 int i;
6962 6954
6963 cpus_clear(*covered); 6955 cpumask_clear(covered);
6964 6956
6965 for_each_cpu_mask_nr(i, *span) { 6957 for_each_cpu(i, span) {
6966 struct sched_group *sg; 6958 struct sched_group *sg;
6967 int group = group_fn(i, cpu_map, &sg, tmpmask); 6959 int group = group_fn(i, cpu_map, &sg, tmpmask);
6968 int j; 6960 int j;
6969 6961
6970 if (cpu_isset(i, *covered)) 6962 if (cpumask_test_cpu(i, covered))
6971 continue; 6963 continue;
6972 6964
6973 cpus_clear(sg->cpumask); 6965 cpumask_clear(sched_group_cpus(sg));
6974 sg->__cpu_power = 0; 6966 sg->__cpu_power = 0;
6975 6967
6976 for_each_cpu_mask_nr(j, *span) { 6968 for_each_cpu(j, span) {
6977 if (group_fn(j, cpu_map, NULL, tmpmask) != group) 6969 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6978 continue; 6970 continue;
6979 6971
6980 cpu_set(j, *covered); 6972 cpumask_set_cpu(j, covered);
6981 cpu_set(j, sg->cpumask); 6973 cpumask_set_cpu(j, sched_group_cpus(sg));
6982 } 6974 }
6983 if (!first) 6975 if (!first)
6984 first = sg; 6976 first = sg;
@@ -7042,9 +7034,10 @@ static int find_next_best_node(int node, nodemask_t *used_nodes)
7042 * should be one that prevents unnecessary balancing, but also spreads tasks 7034 * should be one that prevents unnecessary balancing, but also spreads tasks
7043 * out optimally. 7035 * out optimally.
7044 */ 7036 */
7045static void sched_domain_node_span(int node, cpumask_t *span) 7037static void sched_domain_node_span(int node, struct cpumask *span)
7046{ 7038{
7047 nodemask_t used_nodes; 7039 nodemask_t used_nodes;
7040 /* FIXME: use cpumask_of_node() */
7048 node_to_cpumask_ptr(nodemask, node); 7041 node_to_cpumask_ptr(nodemask, node);
7049 int i; 7042 int i;
7050 7043
@@ -7066,18 +7059,33 @@ static void sched_domain_node_span(int node, cpumask_t *span)
7066int sched_smt_power_savings = 0, sched_mc_power_savings = 0; 7059int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
7067 7060
7068/* 7061/*
7062 * The cpus mask in sched_group and sched_domain hangs off the end.
7063 * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
7064 * for nr_cpu_ids < CONFIG_NR_CPUS.
7065 */
7066struct static_sched_group {
7067 struct sched_group sg;
7068 DECLARE_BITMAP(cpus, CONFIG_NR_CPUS);
7069};
7070
7071struct static_sched_domain {
7072 struct sched_domain sd;
7073 DECLARE_BITMAP(span, CONFIG_NR_CPUS);
7074};
7075
7076/*
7069 * SMT sched-domains: 7077 * SMT sched-domains:
7070 */ 7078 */
7071#ifdef CONFIG_SCHED_SMT 7079#ifdef CONFIG_SCHED_SMT
7072static DEFINE_PER_CPU(struct sched_domain, cpu_domains); 7080static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
7073static DEFINE_PER_CPU(struct sched_group, sched_group_cpus); 7081static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus);
7074 7082
7075static int 7083static int
7076cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7084cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
7077 cpumask_t *unused) 7085 struct sched_group **sg, struct cpumask *unused)
7078{ 7086{
7079 if (sg) 7087 if (sg)
7080 *sg = &per_cpu(sched_group_cpus, cpu); 7088 *sg = &per_cpu(sched_group_cpus, cpu).sg;
7081 return cpu; 7089 return cpu;
7082} 7090}
7083#endif /* CONFIG_SCHED_SMT */ 7091#endif /* CONFIG_SCHED_SMT */
@@ -7086,56 +7094,55 @@ cpu_to_cpu_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg,
7086 * multi-core sched-domains: 7094 * multi-core sched-domains:
7087 */ 7095 */
7088#ifdef CONFIG_SCHED_MC 7096#ifdef CONFIG_SCHED_MC
7089static DEFINE_PER_CPU(struct sched_domain, core_domains); 7097static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
7090static DEFINE_PER_CPU(struct sched_group, sched_group_core); 7098static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
7091#endif /* CONFIG_SCHED_MC */ 7099#endif /* CONFIG_SCHED_MC */
7092 7100
7093#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) 7101#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
7094static int 7102static int
7095cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7103cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7096 cpumask_t *mask) 7104 struct sched_group **sg, struct cpumask *mask)
7097{ 7105{
7098 int group; 7106 int group;
7099 7107
7100 *mask = per_cpu(cpu_sibling_map, cpu); 7108 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
7101 cpus_and(*mask, *mask, *cpu_map); 7109 group = cpumask_first(mask);
7102 group = first_cpu(*mask);
7103 if (sg) 7110 if (sg)
7104 *sg = &per_cpu(sched_group_core, group); 7111 *sg = &per_cpu(sched_group_core, group).sg;
7105 return group; 7112 return group;
7106} 7113}
7107#elif defined(CONFIG_SCHED_MC) 7114#elif defined(CONFIG_SCHED_MC)
7108static int 7115static int
7109cpu_to_core_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7116cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
7110 cpumask_t *unused) 7117 struct sched_group **sg, struct cpumask *unused)
7111{ 7118{
7112 if (sg) 7119 if (sg)
7113 *sg = &per_cpu(sched_group_core, cpu); 7120 *sg = &per_cpu(sched_group_core, cpu).sg;
7114 return cpu; 7121 return cpu;
7115} 7122}
7116#endif 7123#endif
7117 7124
7118static DEFINE_PER_CPU(struct sched_domain, phys_domains); 7125static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
7119static DEFINE_PER_CPU(struct sched_group, sched_group_phys); 7126static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
7120 7127
7121static int 7128static int
7122cpu_to_phys_group(int cpu, const cpumask_t *cpu_map, struct sched_group **sg, 7129cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
7123 cpumask_t *mask) 7130 struct sched_group **sg, struct cpumask *mask)
7124{ 7131{
7125 int group; 7132 int group;
7126#ifdef CONFIG_SCHED_MC 7133#ifdef CONFIG_SCHED_MC
7134 /* FIXME: Use cpu_coregroup_mask. */
7127 *mask = cpu_coregroup_map(cpu); 7135 *mask = cpu_coregroup_map(cpu);
7128 cpus_and(*mask, *mask, *cpu_map); 7136 cpus_and(*mask, *mask, *cpu_map);
7129 group = first_cpu(*mask); 7137 group = cpumask_first(mask);
7130#elif defined(CONFIG_SCHED_SMT) 7138#elif defined(CONFIG_SCHED_SMT)
7131 *mask = per_cpu(cpu_sibling_map, cpu); 7139 cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
7132 cpus_and(*mask, *mask, *cpu_map); 7140 group = cpumask_first(mask);
7133 group = first_cpu(*mask);
7134#else 7141#else
7135 group = cpu; 7142 group = cpu;
7136#endif 7143#endif
7137 if (sg) 7144 if (sg)
7138 *sg = &per_cpu(sched_group_phys, group); 7145 *sg = &per_cpu(sched_group_phys, group).sg;
7139 return group; 7146 return group;
7140} 7147}
7141 7148
@@ -7149,19 +7156,21 @@ static DEFINE_PER_CPU(struct sched_domain, node_domains);
7149static struct sched_group ***sched_group_nodes_bycpu; 7156static struct sched_group ***sched_group_nodes_bycpu;
7150 7157
7151static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); 7158static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
7152static DEFINE_PER_CPU(struct sched_group, sched_group_allnodes); 7159static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7153 7160
7154static int cpu_to_allnodes_group(int cpu, const cpumask_t *cpu_map, 7161static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
7155 struct sched_group **sg, cpumask_t *nodemask) 7162 struct sched_group **sg,
7163 struct cpumask *nodemask)
7156{ 7164{
7157 int group; 7165 int group;
7166 /* FIXME: use cpumask_of_node */
7167 node_to_cpumask_ptr(pnodemask, cpu_to_node(cpu));
7158 7168
7159 *nodemask = node_to_cpumask(cpu_to_node(cpu)); 7169 cpumask_and(nodemask, pnodemask, cpu_map);
7160 cpus_and(*nodemask, *nodemask, *cpu_map); 7170 group = cpumask_first(nodemask);
7161 group = first_cpu(*nodemask);
7162 7171
7163 if (sg) 7172 if (sg)
7164 *sg = &per_cpu(sched_group_allnodes, group); 7173 *sg = &per_cpu(sched_group_allnodes, group).sg;
7165 return group; 7174 return group;
7166} 7175}
7167 7176
@@ -7173,11 +7182,11 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7173 if (!sg) 7182 if (!sg)
7174 return; 7183 return;
7175 do { 7184 do {
7176 for_each_cpu_mask_nr(j, sg->cpumask) { 7185 for_each_cpu(j, sched_group_cpus(sg)) {
7177 struct sched_domain *sd; 7186 struct sched_domain *sd;
7178 7187
7179 sd = &per_cpu(phys_domains, j); 7188 sd = &per_cpu(phys_domains, j).sd;
7180 if (j != first_cpu(sd->groups->cpumask)) { 7189 if (j != cpumask_first(sched_group_cpus(sd->groups))) {
7181 /* 7190 /*
7182 * Only add "power" once for each 7191 * Only add "power" once for each
7183 * physical package. 7192 * physical package.
@@ -7194,11 +7203,12 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7194 7203
7195#ifdef CONFIG_NUMA 7204#ifdef CONFIG_NUMA
7196/* Free memory allocated for various sched_group structures */ 7205/* Free memory allocated for various sched_group structures */
7197static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7206static void free_sched_groups(const struct cpumask *cpu_map,
7207 struct cpumask *nodemask)
7198{ 7208{
7199 int cpu, i; 7209 int cpu, i;
7200 7210
7201 for_each_cpu_mask_nr(cpu, *cpu_map) { 7211 for_each_cpu(cpu, cpu_map) {
7202 struct sched_group **sched_group_nodes 7212 struct sched_group **sched_group_nodes
7203 = sched_group_nodes_bycpu[cpu]; 7213 = sched_group_nodes_bycpu[cpu];
7204 7214
@@ -7207,10 +7217,11 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
7207 7217
7208 for (i = 0; i < nr_node_ids; i++) { 7218 for (i = 0; i < nr_node_ids; i++) {
7209 struct sched_group *oldsg, *sg = sched_group_nodes[i]; 7219 struct sched_group *oldsg, *sg = sched_group_nodes[i];
7220 /* FIXME: Use cpumask_of_node */
7221 node_to_cpumask_ptr(pnodemask, i);
7210 7222
7211 *nodemask = node_to_cpumask(i); 7223 cpus_and(*nodemask, *pnodemask, *cpu_map);
7212 cpus_and(*nodemask, *nodemask, *cpu_map); 7224 if (cpumask_empty(nodemask))
7213 if (cpus_empty(*nodemask))
7214 continue; 7225 continue;
7215 7226
7216 if (sg == NULL) 7227 if (sg == NULL)
@@ -7228,7 +7239,8 @@ next_sg:
7228 } 7239 }
7229} 7240}
7230#else /* !CONFIG_NUMA */ 7241#else /* !CONFIG_NUMA */
7231static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask) 7242static void free_sched_groups(const struct cpumask *cpu_map,
7243 struct cpumask *nodemask)
7232{ 7244{
7233} 7245}
7234#endif /* CONFIG_NUMA */ 7246#endif /* CONFIG_NUMA */
@@ -7254,7 +7266,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7254 7266
7255 WARN_ON(!sd || !sd->groups); 7267 WARN_ON(!sd || !sd->groups);
7256 7268
7257 if (cpu != first_cpu(sd->groups->cpumask)) 7269 if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
7258 return; 7270 return;
7259 7271
7260 child = sd->child; 7272 child = sd->child;
@@ -7319,40 +7331,6 @@ SD_INIT_FUNC(CPU)
7319 SD_INIT_FUNC(MC) 7331 SD_INIT_FUNC(MC)
7320#endif 7332#endif
7321 7333
7322/*
7323 * To minimize stack usage kmalloc room for cpumasks and share the
7324 * space as the usage in build_sched_domains() dictates. Used only
7325 * if the amount of space is significant.
7326 */
7327struct allmasks {
7328 cpumask_t tmpmask; /* make this one first */
7329 union {
7330 cpumask_t nodemask;
7331 cpumask_t this_sibling_map;
7332 cpumask_t this_core_map;
7333 };
7334 cpumask_t send_covered;
7335
7336#ifdef CONFIG_NUMA
7337 cpumask_t domainspan;
7338 cpumask_t covered;
7339 cpumask_t notcovered;
7340#endif
7341};
7342
7343#if NR_CPUS > 128
7344#define SCHED_CPUMASK_ALLOC 1
7345#define SCHED_CPUMASK_FREE(v) kfree(v)
7346#define SCHED_CPUMASK_DECLARE(v) struct allmasks *v
7347#else
7348#define SCHED_CPUMASK_ALLOC 0
7349#define SCHED_CPUMASK_FREE(v)
7350#define SCHED_CPUMASK_DECLARE(v) struct allmasks _v, *v = &_v
7351#endif
7352
7353#define SCHED_CPUMASK_VAR(v, a) cpumask_t *v = (cpumask_t *) \
7354 ((unsigned long)(a) + offsetof(struct allmasks, v))
7355
7356static int default_relax_domain_level = -1; 7334static int default_relax_domain_level = -1;
7357 7335
7358static int __init setup_relax_domain_level(char *str) 7336static int __init setup_relax_domain_level(char *str)
@@ -7392,17 +7370,38 @@ static void set_domain_attribute(struct sched_domain *sd,
7392 * Build sched domains for a given set of cpus and attach the sched domains 7370 * Build sched domains for a given set of cpus and attach the sched domains
7393 * to the individual cpus 7371 * to the individual cpus
7394 */ 7372 */
7395static int __build_sched_domains(const cpumask_t *cpu_map, 7373static int __build_sched_domains(const struct cpumask *cpu_map,
7396 struct sched_domain_attr *attr) 7374 struct sched_domain_attr *attr)
7397{ 7375{
7398 int i; 7376 int i, err = -ENOMEM;
7399 struct root_domain *rd; 7377 struct root_domain *rd;
7400 SCHED_CPUMASK_DECLARE(allmasks); 7378 cpumask_var_t nodemask, this_sibling_map, this_core_map, send_covered,
7401 cpumask_t *tmpmask; 7379 tmpmask;
7402#ifdef CONFIG_NUMA 7380#ifdef CONFIG_NUMA
7381 cpumask_var_t domainspan, covered, notcovered;
7403 struct sched_group **sched_group_nodes = NULL; 7382 struct sched_group **sched_group_nodes = NULL;
7404 int sd_allnodes = 0; 7383 int sd_allnodes = 0;
7405 7384
7385 if (!alloc_cpumask_var(&domainspan, GFP_KERNEL))
7386 goto out;
7387 if (!alloc_cpumask_var(&covered, GFP_KERNEL))
7388 goto free_domainspan;
7389 if (!alloc_cpumask_var(&notcovered, GFP_KERNEL))
7390 goto free_covered;
7391#endif
7392
7393 if (!alloc_cpumask_var(&nodemask, GFP_KERNEL))
7394 goto free_notcovered;
7395 if (!alloc_cpumask_var(&this_sibling_map, GFP_KERNEL))
7396 goto free_nodemask;
7397 if (!alloc_cpumask_var(&this_core_map, GFP_KERNEL))
7398 goto free_this_sibling_map;
7399 if (!alloc_cpumask_var(&send_covered, GFP_KERNEL))
7400 goto free_this_core_map;
7401 if (!alloc_cpumask_var(&tmpmask, GFP_KERNEL))
7402 goto free_send_covered;
7403
7404#ifdef CONFIG_NUMA
7406 /* 7405 /*
7407 * Allocate the per-node list of sched groups 7406 * Allocate the per-node list of sched groups
7408 */ 7407 */
@@ -7410,55 +7409,37 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7410 GFP_KERNEL); 7409 GFP_KERNEL);
7411 if (!sched_group_nodes) { 7410 if (!sched_group_nodes) {
7412 printk(KERN_WARNING "Can not alloc sched group node list\n"); 7411 printk(KERN_WARNING "Can not alloc sched group node list\n");
7413 return -ENOMEM; 7412 goto free_tmpmask;
7414 } 7413 }
7415#endif 7414#endif
7416 7415
7417 rd = alloc_rootdomain(); 7416 rd = alloc_rootdomain();
7418 if (!rd) { 7417 if (!rd) {
7419 printk(KERN_WARNING "Cannot alloc root domain\n"); 7418 printk(KERN_WARNING "Cannot alloc root domain\n");
7420#ifdef CONFIG_NUMA 7419 goto free_sched_groups;
7421 kfree(sched_group_nodes);
7422#endif
7423 return -ENOMEM;
7424 }
7425
7426#if SCHED_CPUMASK_ALLOC
7427 /* get space for all scratch cpumask variables */
7428 allmasks = kmalloc(sizeof(*allmasks), GFP_KERNEL);
7429 if (!allmasks) {
7430 printk(KERN_WARNING "Cannot alloc cpumask array\n");
7431 kfree(rd);
7432#ifdef CONFIG_NUMA
7433 kfree(sched_group_nodes);
7434#endif
7435 return -ENOMEM;
7436 } 7420 }
7437#endif
7438 tmpmask = (cpumask_t *)allmasks;
7439
7440 7421
7441#ifdef CONFIG_NUMA 7422#ifdef CONFIG_NUMA
7442 sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; 7423 sched_group_nodes_bycpu[cpumask_first(cpu_map)] = sched_group_nodes;
7443#endif 7424#endif
7444 7425
7445 /* 7426 /*
7446 * Set up domains for cpus specified by the cpu_map. 7427 * Set up domains for cpus specified by the cpu_map.
7447 */ 7428 */
7448 for_each_cpu_mask_nr(i, *cpu_map) { 7429 for_each_cpu(i, cpu_map) {
7449 struct sched_domain *sd = NULL, *p; 7430 struct sched_domain *sd = NULL, *p;
7450 SCHED_CPUMASK_VAR(nodemask, allmasks);
7451 7431
7432 /* FIXME: use cpumask_of_node */
7452 *nodemask = node_to_cpumask(cpu_to_node(i)); 7433 *nodemask = node_to_cpumask(cpu_to_node(i));
7453 cpus_and(*nodemask, *nodemask, *cpu_map); 7434 cpus_and(*nodemask, *nodemask, *cpu_map);
7454 7435
7455#ifdef CONFIG_NUMA 7436#ifdef CONFIG_NUMA
7456 if (cpus_weight(*cpu_map) > 7437 if (cpumask_weight(cpu_map) >
7457 SD_NODES_PER_DOMAIN*cpus_weight(*nodemask)) { 7438 SD_NODES_PER_DOMAIN*cpumask_weight(nodemask)) {
7458 sd = &per_cpu(allnodes_domains, i); 7439 sd = &per_cpu(allnodes_domains, i);
7459 SD_INIT(sd, ALLNODES); 7440 SD_INIT(sd, ALLNODES);
7460 set_domain_attribute(sd, attr); 7441 set_domain_attribute(sd, attr);
7461 sd->span = *cpu_map; 7442 cpumask_copy(sched_domain_span(sd), cpu_map);
7462 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask); 7443 cpu_to_allnodes_group(i, cpu_map, &sd->groups, tmpmask);
7463 p = sd; 7444 p = sd;
7464 sd_allnodes = 1; 7445 sd_allnodes = 1;
@@ -7468,18 +7449,19 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7468 sd = &per_cpu(node_domains, i); 7449 sd = &per_cpu(node_domains, i);
7469 SD_INIT(sd, NODE); 7450 SD_INIT(sd, NODE);
7470 set_domain_attribute(sd, attr); 7451 set_domain_attribute(sd, attr);
7471 sched_domain_node_span(cpu_to_node(i), &sd->span); 7452 sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
7472 sd->parent = p; 7453 sd->parent = p;
7473 if (p) 7454 if (p)
7474 p->child = sd; 7455 p->child = sd;
7475 cpus_and(sd->span, sd->span, *cpu_map); 7456 cpumask_and(sched_domain_span(sd),
7457 sched_domain_span(sd), cpu_map);
7476#endif 7458#endif
7477 7459
7478 p = sd; 7460 p = sd;
7479 sd = &per_cpu(phys_domains, i); 7461 sd = &per_cpu(phys_domains, i).sd;
7480 SD_INIT(sd, CPU); 7462 SD_INIT(sd, CPU);
7481 set_domain_attribute(sd, attr); 7463 set_domain_attribute(sd, attr);
7482 sd->span = *nodemask; 7464 cpumask_copy(sched_domain_span(sd), nodemask);
7483 sd->parent = p; 7465 sd->parent = p;
7484 if (p) 7466 if (p)
7485 p->child = sd; 7467 p->child = sd;
@@ -7487,11 +7469,12 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7487 7469
7488#ifdef CONFIG_SCHED_MC 7470#ifdef CONFIG_SCHED_MC
7489 p = sd; 7471 p = sd;
7490 sd = &per_cpu(core_domains, i); 7472 sd = &per_cpu(core_domains, i).sd;
7491 SD_INIT(sd, MC); 7473 SD_INIT(sd, MC);
7492 set_domain_attribute(sd, attr); 7474 set_domain_attribute(sd, attr);
7493 sd->span = cpu_coregroup_map(i); 7475 *sched_domain_span(sd) = cpu_coregroup_map(i);
7494 cpus_and(sd->span, sd->span, *cpu_map); 7476 cpumask_and(sched_domain_span(sd),
7477 sched_domain_span(sd), cpu_map);
7495 sd->parent = p; 7478 sd->parent = p;
7496 p->child = sd; 7479 p->child = sd;
7497 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask); 7480 cpu_to_core_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7499,11 +7482,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7499 7482
7500#ifdef CONFIG_SCHED_SMT 7483#ifdef CONFIG_SCHED_SMT
7501 p = sd; 7484 p = sd;
7502 sd = &per_cpu(cpu_domains, i); 7485 sd = &per_cpu(cpu_domains, i).sd;
7503 SD_INIT(sd, SIBLING); 7486 SD_INIT(sd, SIBLING);
7504 set_domain_attribute(sd, attr); 7487 set_domain_attribute(sd, attr);
7505 sd->span = per_cpu(cpu_sibling_map, i); 7488 cpumask_and(sched_domain_span(sd),
7506 cpus_and(sd->span, sd->span, *cpu_map); 7489 &per_cpu(cpu_sibling_map, i), cpu_map);
7507 sd->parent = p; 7490 sd->parent = p;
7508 p->child = sd; 7491 p->child = sd;
7509 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask); 7492 cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
@@ -7512,13 +7495,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7512 7495
7513#ifdef CONFIG_SCHED_SMT 7496#ifdef CONFIG_SCHED_SMT
7514 /* Set up CPU (sibling) groups */ 7497 /* Set up CPU (sibling) groups */
7515 for_each_cpu_mask_nr(i, *cpu_map) { 7498 for_each_cpu(i, cpu_map) {
7516 SCHED_CPUMASK_VAR(this_sibling_map, allmasks); 7499 cpumask_and(this_sibling_map,
7517 SCHED_CPUMASK_VAR(send_covered, allmasks); 7500 &per_cpu(cpu_sibling_map, i), cpu_map);
7518 7501 if (i != cpumask_first(this_sibling_map))
7519 *this_sibling_map = per_cpu(cpu_sibling_map, i);
7520 cpus_and(*this_sibling_map, *this_sibling_map, *cpu_map);
7521 if (i != first_cpu(*this_sibling_map))
7522 continue; 7502 continue;
7523 7503
7524 init_sched_build_groups(this_sibling_map, cpu_map, 7504 init_sched_build_groups(this_sibling_map, cpu_map,
@@ -7529,13 +7509,11 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7529 7509
7530#ifdef CONFIG_SCHED_MC 7510#ifdef CONFIG_SCHED_MC
7531 /* Set up multi-core groups */ 7511 /* Set up multi-core groups */
7532 for_each_cpu_mask_nr(i, *cpu_map) { 7512 for_each_cpu(i, cpu_map) {
7533 SCHED_CPUMASK_VAR(this_core_map, allmasks); 7513 /* FIXME: Use cpu_coregroup_mask */
7534 SCHED_CPUMASK_VAR(send_covered, allmasks);
7535
7536 *this_core_map = cpu_coregroup_map(i); 7514 *this_core_map = cpu_coregroup_map(i);
7537 cpus_and(*this_core_map, *this_core_map, *cpu_map); 7515 cpus_and(*this_core_map, *this_core_map, *cpu_map);
7538 if (i != first_cpu(*this_core_map)) 7516 if (i != cpumask_first(this_core_map))
7539 continue; 7517 continue;
7540 7518
7541 init_sched_build_groups(this_core_map, cpu_map, 7519 init_sched_build_groups(this_core_map, cpu_map,
@@ -7546,12 +7524,10 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7546 7524
7547 /* Set up physical groups */ 7525 /* Set up physical groups */
7548 for (i = 0; i < nr_node_ids; i++) { 7526 for (i = 0; i < nr_node_ids; i++) {
7549 SCHED_CPUMASK_VAR(nodemask, allmasks); 7527 /* FIXME: Use cpumask_of_node */
7550 SCHED_CPUMASK_VAR(send_covered, allmasks);
7551
7552 *nodemask = node_to_cpumask(i); 7528 *nodemask = node_to_cpumask(i);
7553 cpus_and(*nodemask, *nodemask, *cpu_map); 7529 cpus_and(*nodemask, *nodemask, *cpu_map);
7554 if (cpus_empty(*nodemask)) 7530 if (cpumask_empty(nodemask))
7555 continue; 7531 continue;
7556 7532
7557 init_sched_build_groups(nodemask, cpu_map, 7533 init_sched_build_groups(nodemask, cpu_map,
@@ -7562,8 +7538,6 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7562#ifdef CONFIG_NUMA 7538#ifdef CONFIG_NUMA
7563 /* Set up node groups */ 7539 /* Set up node groups */
7564 if (sd_allnodes) { 7540 if (sd_allnodes) {
7565 SCHED_CPUMASK_VAR(send_covered, allmasks);
7566
7567 init_sched_build_groups(cpu_map, cpu_map, 7541 init_sched_build_groups(cpu_map, cpu_map,
7568 &cpu_to_allnodes_group, 7542 &cpu_to_allnodes_group,
7569 send_covered, tmpmask); 7543 send_covered, tmpmask);
@@ -7572,58 +7546,58 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7572 for (i = 0; i < nr_node_ids; i++) { 7546 for (i = 0; i < nr_node_ids; i++) {
7573 /* Set up node groups */ 7547 /* Set up node groups */
7574 struct sched_group *sg, *prev; 7548 struct sched_group *sg, *prev;
7575 SCHED_CPUMASK_VAR(nodemask, allmasks);
7576 SCHED_CPUMASK_VAR(domainspan, allmasks);
7577 SCHED_CPUMASK_VAR(covered, allmasks);
7578 int j; 7549 int j;
7579 7550
7551 /* FIXME: Use cpumask_of_node */
7580 *nodemask = node_to_cpumask(i); 7552 *nodemask = node_to_cpumask(i);
7581 cpus_clear(*covered); 7553 cpumask_clear(covered);
7582 7554
7583 cpus_and(*nodemask, *nodemask, *cpu_map); 7555 cpus_and(*nodemask, *nodemask, *cpu_map);
7584 if (cpus_empty(*nodemask)) { 7556 if (cpumask_empty(nodemask)) {
7585 sched_group_nodes[i] = NULL; 7557 sched_group_nodes[i] = NULL;
7586 continue; 7558 continue;
7587 } 7559 }
7588 7560
7589 sched_domain_node_span(i, domainspan); 7561 sched_domain_node_span(i, domainspan);
7590 cpus_and(*domainspan, *domainspan, *cpu_map); 7562 cpumask_and(domainspan, domainspan, cpu_map);
7591 7563
7592 sg = kmalloc_node(sizeof(struct sched_group), GFP_KERNEL, i); 7564 sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
7565 GFP_KERNEL, i);
7593 if (!sg) { 7566 if (!sg) {
7594 printk(KERN_WARNING "Can not alloc domain group for " 7567 printk(KERN_WARNING "Can not alloc domain group for "
7595 "node %d\n", i); 7568 "node %d\n", i);
7596 goto error; 7569 goto error;
7597 } 7570 }
7598 sched_group_nodes[i] = sg; 7571 sched_group_nodes[i] = sg;
7599 for_each_cpu_mask_nr(j, *nodemask) { 7572 for_each_cpu(j, nodemask) {
7600 struct sched_domain *sd; 7573 struct sched_domain *sd;
7601 7574
7602 sd = &per_cpu(node_domains, j); 7575 sd = &per_cpu(node_domains, j);
7603 sd->groups = sg; 7576 sd->groups = sg;
7604 } 7577 }
7605 sg->__cpu_power = 0; 7578 sg->__cpu_power = 0;
7606 sg->cpumask = *nodemask; 7579 cpumask_copy(sched_group_cpus(sg), nodemask);
7607 sg->next = sg; 7580 sg->next = sg;
7608 cpus_or(*covered, *covered, *nodemask); 7581 cpumask_or(covered, covered, nodemask);
7609 prev = sg; 7582 prev = sg;
7610 7583
7611 for (j = 0; j < nr_node_ids; j++) { 7584 for (j = 0; j < nr_node_ids; j++) {
7612 SCHED_CPUMASK_VAR(notcovered, allmasks);
7613 int n = (i + j) % nr_node_ids; 7585 int n = (i + j) % nr_node_ids;
7586 /* FIXME: Use cpumask_of_node */
7614 node_to_cpumask_ptr(pnodemask, n); 7587 node_to_cpumask_ptr(pnodemask, n);
7615 7588
7616 cpus_complement(*notcovered, *covered); 7589 cpumask_complement(notcovered, covered);
7617 cpus_and(*tmpmask, *notcovered, *cpu_map); 7590 cpumask_and(tmpmask, notcovered, cpu_map);
7618 cpus_and(*tmpmask, *tmpmask, *domainspan); 7591 cpumask_and(tmpmask, tmpmask, domainspan);
7619 if (cpus_empty(*tmpmask)) 7592 if (cpumask_empty(tmpmask))
7620 break; 7593 break;
7621 7594
7622 cpus_and(*tmpmask, *tmpmask, *pnodemask); 7595 cpumask_and(tmpmask, tmpmask, pnodemask);
7623 if (cpus_empty(*tmpmask)) 7596 if (cpumask_empty(tmpmask))
7624 continue; 7597 continue;
7625 7598
7626 sg = kmalloc_node(sizeof(struct sched_group), 7599 sg = kmalloc_node(sizeof(struct sched_group) +
7600 cpumask_size(),
7627 GFP_KERNEL, i); 7601 GFP_KERNEL, i);
7628 if (!sg) { 7602 if (!sg) {
7629 printk(KERN_WARNING 7603 printk(KERN_WARNING
@@ -7631,9 +7605,9 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7631 goto error; 7605 goto error;
7632 } 7606 }
7633 sg->__cpu_power = 0; 7607 sg->__cpu_power = 0;
7634 sg->cpumask = *tmpmask; 7608 cpumask_copy(sched_group_cpus(sg), tmpmask);
7635 sg->next = prev->next; 7609 sg->next = prev->next;
7636 cpus_or(*covered, *covered, *tmpmask); 7610 cpumask_or(covered, covered, tmpmask);
7637 prev->next = sg; 7611 prev->next = sg;
7638 prev = sg; 7612 prev = sg;
7639 } 7613 }
@@ -7642,22 +7616,22 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7642 7616
7643 /* Calculate CPU power for physical packages and nodes */ 7617 /* Calculate CPU power for physical packages and nodes */
7644#ifdef CONFIG_SCHED_SMT 7618#ifdef CONFIG_SCHED_SMT
7645 for_each_cpu_mask_nr(i, *cpu_map) { 7619 for_each_cpu(i, cpu_map) {
7646 struct sched_domain *sd = &per_cpu(cpu_domains, i); 7620 struct sched_domain *sd = &per_cpu(cpu_domains, i).sd;
7647 7621
7648 init_sched_groups_power(i, sd); 7622 init_sched_groups_power(i, sd);
7649 } 7623 }
7650#endif 7624#endif
7651#ifdef CONFIG_SCHED_MC 7625#ifdef CONFIG_SCHED_MC
7652 for_each_cpu_mask_nr(i, *cpu_map) { 7626 for_each_cpu(i, cpu_map) {
7653 struct sched_domain *sd = &per_cpu(core_domains, i); 7627 struct sched_domain *sd = &per_cpu(core_domains, i).sd;
7654 7628
7655 init_sched_groups_power(i, sd); 7629 init_sched_groups_power(i, sd);
7656 } 7630 }
7657#endif 7631#endif
7658 7632
7659 for_each_cpu_mask_nr(i, *cpu_map) { 7633 for_each_cpu(i, cpu_map) {
7660 struct sched_domain *sd = &per_cpu(phys_domains, i); 7634 struct sched_domain *sd = &per_cpu(phys_domains, i).sd;
7661 7635
7662 init_sched_groups_power(i, sd); 7636 init_sched_groups_power(i, sd);
7663 } 7637 }
@@ -7669,53 +7643,78 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7669 if (sd_allnodes) { 7643 if (sd_allnodes) {
7670 struct sched_group *sg; 7644 struct sched_group *sg;
7671 7645
7672 cpu_to_allnodes_group(first_cpu(*cpu_map), cpu_map, &sg, 7646 cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
7673 tmpmask); 7647 tmpmask);
7674 init_numa_sched_groups_power(sg); 7648 init_numa_sched_groups_power(sg);
7675 } 7649 }
7676#endif 7650#endif
7677 7651
7678 /* Attach the domains */ 7652 /* Attach the domains */
7679 for_each_cpu_mask_nr(i, *cpu_map) { 7653 for_each_cpu(i, cpu_map) {
7680 struct sched_domain *sd; 7654 struct sched_domain *sd;
7681#ifdef CONFIG_SCHED_SMT 7655#ifdef CONFIG_SCHED_SMT
7682 sd = &per_cpu(cpu_domains, i); 7656 sd = &per_cpu(cpu_domains, i).sd;
7683#elif defined(CONFIG_SCHED_MC) 7657#elif defined(CONFIG_SCHED_MC)
7684 sd = &per_cpu(core_domains, i); 7658 sd = &per_cpu(core_domains, i).sd;
7685#else 7659#else
7686 sd = &per_cpu(phys_domains, i); 7660 sd = &per_cpu(phys_domains, i).sd;
7687#endif 7661#endif
7688 cpu_attach_domain(sd, rd, i); 7662 cpu_attach_domain(sd, rd, i);
7689 } 7663 }
7690 7664
7691 SCHED_CPUMASK_FREE((void *)allmasks); 7665 err = 0;
7692 return 0; 7666
7667free_tmpmask:
7668 free_cpumask_var(tmpmask);
7669free_send_covered:
7670 free_cpumask_var(send_covered);
7671free_this_core_map:
7672 free_cpumask_var(this_core_map);
7673free_this_sibling_map:
7674 free_cpumask_var(this_sibling_map);
7675free_nodemask:
7676 free_cpumask_var(nodemask);
7677free_notcovered:
7678#ifdef CONFIG_NUMA
7679 free_cpumask_var(notcovered);
7680free_covered:
7681 free_cpumask_var(covered);
7682free_domainspan:
7683 free_cpumask_var(domainspan);
7684out:
7685#endif
7686 return err;
7687
7688free_sched_groups:
7689#ifdef CONFIG_NUMA
7690 kfree(sched_group_nodes);
7691#endif
7692 goto free_tmpmask;
7693 7693
7694#ifdef CONFIG_NUMA 7694#ifdef CONFIG_NUMA
7695error: 7695error:
7696 free_sched_groups(cpu_map, tmpmask); 7696 free_sched_groups(cpu_map, tmpmask);
7697 SCHED_CPUMASK_FREE((void *)allmasks); 7697 free_rootdomain(rd);
7698 kfree(rd); 7698 goto free_tmpmask;
7699 return -ENOMEM;
7700#endif 7699#endif
7701} 7700}
7702 7701
7703static int build_sched_domains(const cpumask_t *cpu_map) 7702static int build_sched_domains(const struct cpumask *cpu_map)
7704{ 7703{
7705 return __build_sched_domains(cpu_map, NULL); 7704 return __build_sched_domains(cpu_map, NULL);
7706} 7705}
7707 7706
7708static cpumask_t *doms_cur; /* current sched domains */ 7707static struct cpumask *doms_cur; /* current sched domains */
7709static int ndoms_cur; /* number of sched domains in 'doms_cur' */ 7708static int ndoms_cur; /* number of sched domains in 'doms_cur' */
7710static struct sched_domain_attr *dattr_cur; 7709static struct sched_domain_attr *dattr_cur;
7711 /* attribues of custom domains in 'doms_cur' */ 7710 /* attribues of custom domains in 'doms_cur' */
7712 7711
7713/* 7712/*
7714 * Special case: If a kmalloc of a doms_cur partition (array of 7713 * Special case: If a kmalloc of a doms_cur partition (array of
7715 * cpumask_t) fails, then fallback to a single sched domain, 7714 * cpumask) fails, then fallback to a single sched domain,
7716 * as determined by the single cpumask_t fallback_doms. 7715 * as determined by the single cpumask fallback_doms.
7717 */ 7716 */
7718static cpumask_t fallback_doms; 7717static cpumask_var_t fallback_doms;
7719 7718
7720void __attribute__((weak)) arch_update_cpu_topology(void) 7719void __attribute__((weak)) arch_update_cpu_topology(void)
7721{ 7720{
@@ -7726,16 +7725,16 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7726 * For now this just excludes isolated cpus, but could be used to 7725 * For now this just excludes isolated cpus, but could be used to
7727 * exclude other special cases in the future. 7726 * exclude other special cases in the future.
7728 */ 7727 */
7729static int arch_init_sched_domains(const cpumask_t *cpu_map) 7728static int arch_init_sched_domains(const struct cpumask *cpu_map)
7730{ 7729{
7731 int err; 7730 int err;
7732 7731
7733 arch_update_cpu_topology(); 7732 arch_update_cpu_topology();
7734 ndoms_cur = 1; 7733 ndoms_cur = 1;
7735 doms_cur = kmalloc(sizeof(cpumask_t), GFP_KERNEL); 7734 doms_cur = kmalloc(cpumask_size(), GFP_KERNEL);
7736 if (!doms_cur) 7735 if (!doms_cur)
7737 doms_cur = &fallback_doms; 7736 doms_cur = fallback_doms;
7738 cpus_andnot(*doms_cur, *cpu_map, cpu_isolated_map); 7737 cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map);
7739 dattr_cur = NULL; 7738 dattr_cur = NULL;
7740 err = build_sched_domains(doms_cur); 7739 err = build_sched_domains(doms_cur);
7741 register_sched_domain_sysctl(); 7740 register_sched_domain_sysctl();
@@ -7743,8 +7742,8 @@ static int arch_init_sched_domains(const cpumask_t *cpu_map)
7743 return err; 7742 return err;
7744} 7743}
7745 7744
7746static void arch_destroy_sched_domains(const cpumask_t *cpu_map, 7745static void arch_destroy_sched_domains(const struct cpumask *cpu_map,
7747 cpumask_t *tmpmask) 7746 struct cpumask *tmpmask)
7748{ 7747{
7749 free_sched_groups(cpu_map, tmpmask); 7748 free_sched_groups(cpu_map, tmpmask);
7750} 7749}
@@ -7753,17 +7752,16 @@ static void arch_destroy_sched_domains(const cpumask_t *cpu_map,
7753 * Detach sched domains from a group of cpus specified in cpu_map 7752 * Detach sched domains from a group of cpus specified in cpu_map
7754 * These cpus will now be attached to the NULL domain 7753 * These cpus will now be attached to the NULL domain
7755 */ 7754 */
7756static void detach_destroy_domains(const cpumask_t *cpu_map) 7755static void detach_destroy_domains(const struct cpumask *cpu_map)
7757{ 7756{
7758 cpumask_t tmpmask; 7757 /* Save because hotplug lock held. */
7758 static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
7759 int i; 7759 int i;
7760 7760
7761 unregister_sched_domain_sysctl(); 7761 for_each_cpu(i, cpu_map)
7762
7763 for_each_cpu_mask_nr(i, *cpu_map)
7764 cpu_attach_domain(NULL, &def_root_domain, i); 7762 cpu_attach_domain(NULL, &def_root_domain, i);
7765 synchronize_sched(); 7763 synchronize_sched();
7766 arch_destroy_sched_domains(cpu_map, &tmpmask); 7764 arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
7767} 7765}
7768 7766
7769/* handle null as "default" */ 7767/* handle null as "default" */
@@ -7788,7 +7786,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7788 * doms_new[] to the current sched domain partitioning, doms_cur[]. 7786 * doms_new[] to the current sched domain partitioning, doms_cur[].
7789 * It destroys each deleted domain and builds each new domain. 7787 * It destroys each deleted domain and builds each new domain.
7790 * 7788 *
7791 * 'doms_new' is an array of cpumask_t's of length 'ndoms_new'. 7789 * 'doms_new' is an array of cpumask's of length 'ndoms_new'.
7792 * The masks don't intersect (don't overlap.) We should setup one 7790 * The masks don't intersect (don't overlap.) We should setup one
7793 * sched domain for each mask. CPUs not in any of the cpumasks will 7791 * sched domain for each mask. CPUs not in any of the cpumasks will
7794 * not be load balanced. If the same cpumask appears both in the 7792 * not be load balanced. If the same cpumask appears both in the
@@ -7802,13 +7800,14 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7802 * the single partition 'fallback_doms', it also forces the domains 7800 * the single partition 'fallback_doms', it also forces the domains
7803 * to be rebuilt. 7801 * to be rebuilt.
7804 * 7802 *
7805 * If doms_new == NULL it will be replaced with cpu_online_map. 7803 * If doms_new == NULL it will be replaced with cpu_online_mask.
7806 * ndoms_new == 0 is a special case for destroying existing domains, 7804 * ndoms_new == 0 is a special case for destroying existing domains,
7807 * and it will not create the default domain. 7805 * and it will not create the default domain.
7808 * 7806 *
7809 * Call with hotplug lock held 7807 * Call with hotplug lock held
7810 */ 7808 */
7811void partition_sched_domains(int ndoms_new, cpumask_t *doms_new, 7809/* FIXME: Change to struct cpumask *doms_new[] */
7810void partition_sched_domains(int ndoms_new, struct cpumask *doms_new,
7812 struct sched_domain_attr *dattr_new) 7811 struct sched_domain_attr *dattr_new)
7813{ 7812{
7814 int i, j, n; 7813 int i, j, n;
@@ -7823,7 +7822,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7823 /* Destroy deleted domains */ 7822 /* Destroy deleted domains */
7824 for (i = 0; i < ndoms_cur; i++) { 7823 for (i = 0; i < ndoms_cur; i++) {
7825 for (j = 0; j < n; j++) { 7824 for (j = 0; j < n; j++) {
7826 if (cpus_equal(doms_cur[i], doms_new[j]) 7825 if (cpumask_equal(&doms_cur[i], &doms_new[j])
7827 && dattrs_equal(dattr_cur, i, dattr_new, j)) 7826 && dattrs_equal(dattr_cur, i, dattr_new, j))
7828 goto match1; 7827 goto match1;
7829 } 7828 }
@@ -7835,15 +7834,15 @@ match1:
7835 7834
7836 if (doms_new == NULL) { 7835 if (doms_new == NULL) {
7837 ndoms_cur = 0; 7836 ndoms_cur = 0;
7838 doms_new = &fallback_doms; 7837 doms_new = fallback_doms;
7839 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map); 7838 cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map);
7840 dattr_new = NULL; 7839 WARN_ON_ONCE(dattr_new);
7841 } 7840 }
7842 7841
7843 /* Build new domains */ 7842 /* Build new domains */
7844 for (i = 0; i < ndoms_new; i++) { 7843 for (i = 0; i < ndoms_new; i++) {
7845 for (j = 0; j < ndoms_cur; j++) { 7844 for (j = 0; j < ndoms_cur; j++) {
7846 if (cpus_equal(doms_new[i], doms_cur[j]) 7845 if (cpumask_equal(&doms_new[i], &doms_cur[j])
7847 && dattrs_equal(dattr_new, i, dattr_cur, j)) 7846 && dattrs_equal(dattr_new, i, dattr_cur, j))
7848 goto match2; 7847 goto match2;
7849 } 7848 }
@@ -7855,7 +7854,7 @@ match2:
7855 } 7854 }
7856 7855
7857 /* Remember the new sched domains */ 7856 /* Remember the new sched domains */
7858 if (doms_cur != &fallback_doms) 7857 if (doms_cur != fallback_doms)
7859 kfree(doms_cur); 7858 kfree(doms_cur);
7860 kfree(dattr_cur); /* kfree(NULL) is safe */ 7859 kfree(dattr_cur); /* kfree(NULL) is safe */
7861 doms_cur = doms_new; 7860 doms_cur = doms_new;
@@ -7995,7 +7994,9 @@ static int update_runtime(struct notifier_block *nfb,
7995 7994
7996void __init sched_init_smp(void) 7995void __init sched_init_smp(void)
7997{ 7996{
7998 cpumask_t non_isolated_cpus; 7997 cpumask_var_t non_isolated_cpus;
7998
7999 alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
7999 8000
8000#if defined(CONFIG_NUMA) 8001#if defined(CONFIG_NUMA)
8001 sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), 8002 sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
@@ -8004,10 +8005,10 @@ void __init sched_init_smp(void)
8004#endif 8005#endif
8005 get_online_cpus(); 8006 get_online_cpus();
8006 mutex_lock(&sched_domains_mutex); 8007 mutex_lock(&sched_domains_mutex);
8007 arch_init_sched_domains(&cpu_online_map); 8008 arch_init_sched_domains(cpu_online_mask);
8008 cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map); 8009 cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map);
8009 if (cpus_empty(non_isolated_cpus)) 8010 if (cpumask_empty(non_isolated_cpus))
8010 cpu_set(smp_processor_id(), non_isolated_cpus); 8011 cpumask_set_cpu(smp_processor_id(), non_isolated_cpus);
8011 mutex_unlock(&sched_domains_mutex); 8012 mutex_unlock(&sched_domains_mutex);
8012 put_online_cpus(); 8013 put_online_cpus();
8013 8014
@@ -8022,9 +8023,13 @@ void __init sched_init_smp(void)
8022 init_hrtick(); 8023 init_hrtick();
8023 8024
8024 /* Move init over to a non-isolated CPU */ 8025 /* Move init over to a non-isolated CPU */
8025 if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0) 8026 if (set_cpus_allowed_ptr(current, non_isolated_cpus) < 0)
8026 BUG(); 8027 BUG();
8027 sched_init_granularity(); 8028 sched_init_granularity();
8029 free_cpumask_var(non_isolated_cpus);
8030
8031 alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
8032 init_sched_rt_class();
8028} 8033}
8029#else 8034#else
8030void __init sched_init_smp(void) 8035void __init sched_init_smp(void)
@@ -8339,6 +8344,15 @@ void __init sched_init(void)
8339 */ 8344 */
8340 current->sched_class = &fair_sched_class; 8345 current->sched_class = &fair_sched_class;
8341 8346
8347 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
8348 alloc_bootmem_cpumask_var(&nohz_cpu_mask);
8349#ifdef CONFIG_SMP
8350#ifdef CONFIG_NO_HZ
8351 alloc_bootmem_cpumask_var(&nohz.cpu_mask);
8352#endif
8353 alloc_bootmem_cpumask_var(&cpu_isolated_map);
8354#endif /* SMP */
8355
8342 scheduler_running = 1; 8356 scheduler_running = 1;
8343} 8357}
8344 8358
@@ -8497,7 +8511,7 @@ static
8497int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) 8511int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8498{ 8512{
8499 struct cfs_rq *cfs_rq; 8513 struct cfs_rq *cfs_rq;
8500 struct sched_entity *se, *parent_se; 8514 struct sched_entity *se;
8501 struct rq *rq; 8515 struct rq *rq;
8502 int i; 8516 int i;
8503 8517
@@ -8513,18 +8527,17 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8513 for_each_possible_cpu(i) { 8527 for_each_possible_cpu(i) {
8514 rq = cpu_rq(i); 8528 rq = cpu_rq(i);
8515 8529
8516 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), 8530 cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
8517 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8531 GFP_KERNEL, cpu_to_node(i));
8518 if (!cfs_rq) 8532 if (!cfs_rq)
8519 goto err; 8533 goto err;
8520 8534
8521 se = kmalloc_node(sizeof(struct sched_entity), 8535 se = kzalloc_node(sizeof(struct sched_entity),
8522 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8536 GFP_KERNEL, cpu_to_node(i));
8523 if (!se) 8537 if (!se)
8524 goto err; 8538 goto err;
8525 8539
8526 parent_se = parent ? parent->se[i] : NULL; 8540 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]);
8527 init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent_se);
8528 } 8541 }
8529 8542
8530 return 1; 8543 return 1;
@@ -8585,7 +8598,7 @@ static
8585int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 8598int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8586{ 8599{
8587 struct rt_rq *rt_rq; 8600 struct rt_rq *rt_rq;
8588 struct sched_rt_entity *rt_se, *parent_se; 8601 struct sched_rt_entity *rt_se;
8589 struct rq *rq; 8602 struct rq *rq;
8590 int i; 8603 int i;
8591 8604
@@ -8602,18 +8615,17 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8602 for_each_possible_cpu(i) { 8615 for_each_possible_cpu(i) {
8603 rq = cpu_rq(i); 8616 rq = cpu_rq(i);
8604 8617
8605 rt_rq = kmalloc_node(sizeof(struct rt_rq), 8618 rt_rq = kzalloc_node(sizeof(struct rt_rq),
8606 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8619 GFP_KERNEL, cpu_to_node(i));
8607 if (!rt_rq) 8620 if (!rt_rq)
8608 goto err; 8621 goto err;
8609 8622
8610 rt_se = kmalloc_node(sizeof(struct sched_rt_entity), 8623 rt_se = kzalloc_node(sizeof(struct sched_rt_entity),
8611 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); 8624 GFP_KERNEL, cpu_to_node(i));
8612 if (!rt_se) 8625 if (!rt_se)
8613 goto err; 8626 goto err;
8614 8627
8615 parent_se = parent ? parent->rt_se[i] : NULL; 8628 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]);
8616 init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent_se);
8617 } 8629 }
8618 8630
8619 return 1; 8631 return 1;
@@ -9256,11 +9268,12 @@ struct cgroup_subsys cpu_cgroup_subsys = {
9256 * (balbir@in.ibm.com). 9268 * (balbir@in.ibm.com).
9257 */ 9269 */
9258 9270
9259/* track cpu usage of a group of tasks */ 9271/* track cpu usage of a group of tasks and its child groups */
9260struct cpuacct { 9272struct cpuacct {
9261 struct cgroup_subsys_state css; 9273 struct cgroup_subsys_state css;
9262 /* cpuusage holds pointer to a u64-type object on every cpu */ 9274 /* cpuusage holds pointer to a u64-type object on every cpu */
9263 u64 *cpuusage; 9275 u64 *cpuusage;
9276 struct cpuacct *parent;
9264}; 9277};
9265 9278
9266struct cgroup_subsys cpuacct_subsys; 9279struct cgroup_subsys cpuacct_subsys;
@@ -9294,6 +9307,9 @@ static struct cgroup_subsys_state *cpuacct_create(
9294 return ERR_PTR(-ENOMEM); 9307 return ERR_PTR(-ENOMEM);
9295 } 9308 }
9296 9309
9310 if (cgrp->parent)
9311 ca->parent = cgroup_ca(cgrp->parent);
9312
9297 return &ca->css; 9313 return &ca->css;
9298} 9314}
9299 9315
@@ -9373,14 +9389,16 @@ static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
9373static void cpuacct_charge(struct task_struct *tsk, u64 cputime) 9389static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
9374{ 9390{
9375 struct cpuacct *ca; 9391 struct cpuacct *ca;
9392 int cpu;
9376 9393
9377 if (!cpuacct_subsys.active) 9394 if (!cpuacct_subsys.active)
9378 return; 9395 return;
9379 9396
9397 cpu = task_cpu(tsk);
9380 ca = task_ca(tsk); 9398 ca = task_ca(tsk);
9381 if (ca) {
9382 u64 *cpuusage = percpu_ptr(ca->cpuusage, task_cpu(tsk));
9383 9399
9400 for (; ca; ca = ca->parent) {
9401 u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
9384 *cpuusage += cputime; 9402 *cpuusage += cputime;
9385 } 9403 }
9386} 9404}
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 52154fefab7e..018b7be1db2e 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -67,24 +67,21 @@ static int convert_prio(int prio)
67 * Returns: (int)bool - CPUs were found 67 * Returns: (int)bool - CPUs were found
68 */ 68 */
69int cpupri_find(struct cpupri *cp, struct task_struct *p, 69int cpupri_find(struct cpupri *cp, struct task_struct *p,
70 cpumask_t *lowest_mask) 70 struct cpumask *lowest_mask)
71{ 71{
72 int idx = 0; 72 int idx = 0;
73 int task_pri = convert_prio(p->prio); 73 int task_pri = convert_prio(p->prio);
74 74
75 for_each_cpupri_active(cp->pri_active, idx) { 75 for_each_cpupri_active(cp->pri_active, idx) {
76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 76 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
77 cpumask_t mask;
78 77
79 if (idx >= task_pri) 78 if (idx >= task_pri)
80 break; 79 break;
81 80
82 cpus_and(mask, p->cpus_allowed, vec->mask); 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
83
84 if (cpus_empty(mask))
85 continue; 82 continue;
86 83
87 *lowest_mask = mask; 84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
88 return 1; 85 return 1;
89 } 86 }
90 87
@@ -126,7 +123,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
126 vec->count--; 123 vec->count--;
127 if (!vec->count) 124 if (!vec->count)
128 clear_bit(oldpri, cp->pri_active); 125 clear_bit(oldpri, cp->pri_active);
129 cpu_clear(cpu, vec->mask); 126 cpumask_clear_cpu(cpu, vec->mask);
130 127
131 spin_unlock_irqrestore(&vec->lock, flags); 128 spin_unlock_irqrestore(&vec->lock, flags);
132 } 129 }
@@ -136,7 +133,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
136 133
137 spin_lock_irqsave(&vec->lock, flags); 134 spin_lock_irqsave(&vec->lock, flags);
138 135
139 cpu_set(cpu, vec->mask); 136 cpumask_set_cpu(cpu, vec->mask);
140 vec->count++; 137 vec->count++;
141 if (vec->count == 1) 138 if (vec->count == 1)
142 set_bit(newpri, cp->pri_active); 139 set_bit(newpri, cp->pri_active);
@@ -150,10 +147,11 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
150/** 147/**
151 * cpupri_init - initialize the cpupri structure 148 * cpupri_init - initialize the cpupri structure
152 * @cp: The cpupri context 149 * @cp: The cpupri context
150 * @bootmem: true if allocations need to use bootmem
153 * 151 *
154 * Returns: (void) 152 * Returns: -ENOMEM if memory fails.
155 */ 153 */
156void cpupri_init(struct cpupri *cp) 154int cpupri_init(struct cpupri *cp, bool bootmem)
157{ 155{
158 int i; 156 int i;
159 157
@@ -164,11 +162,30 @@ void cpupri_init(struct cpupri *cp)
164 162
165 spin_lock_init(&vec->lock); 163 spin_lock_init(&vec->lock);
166 vec->count = 0; 164 vec->count = 0;
167 cpus_clear(vec->mask); 165 if (bootmem)
166 alloc_bootmem_cpumask_var(&vec->mask);
167 else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
168 goto cleanup;
168 } 169 }
169 170
170 for_each_possible_cpu(i) 171 for_each_possible_cpu(i)
171 cp->cpu_to_pri[i] = CPUPRI_INVALID; 172 cp->cpu_to_pri[i] = CPUPRI_INVALID;
173 return 0;
174
175cleanup:
176 for (i--; i >= 0; i--)
177 free_cpumask_var(cp->pri_to_cpu[i].mask);
178 return -ENOMEM;
172} 179}
173 180
181/**
182 * cpupri_cleanup - clean up the cpupri structure
183 * @cp: The cpupri context
184 */
185void cpupri_cleanup(struct cpupri *cp)
186{
187 int i;
174 188
189 for (i = 0; i < CPUPRI_NR_PRIORITIES; i++)
190 free_cpumask_var(cp->pri_to_cpu[i].mask);
191}
diff --git a/kernel/sched_cpupri.h b/kernel/sched_cpupri.h
index f25811b0f931..642a94ef8a0a 100644
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -14,7 +14,7 @@
14struct cpupri_vec { 14struct cpupri_vec {
15 spinlock_t lock; 15 spinlock_t lock;
16 int count; 16 int count;
17 cpumask_t mask; 17 cpumask_var_t mask;
18}; 18};
19 19
20struct cpupri { 20struct cpupri {
@@ -27,7 +27,8 @@ struct cpupri {
27int cpupri_find(struct cpupri *cp, 27int cpupri_find(struct cpupri *cp,
28 struct task_struct *p, cpumask_t *lowest_mask); 28 struct task_struct *p, cpumask_t *lowest_mask);
29void cpupri_set(struct cpupri *cp, int cpu, int pri); 29void cpupri_set(struct cpupri *cp, int cpu, int pri);
30void cpupri_init(struct cpupri *cp); 30int cpupri_init(struct cpupri *cp, bool bootmem);
31void cpupri_cleanup(struct cpupri *cp);
31#else 32#else
32#define cpupri_set(cp, cpu, pri) do { } while (0) 33#define cpupri_set(cp, cpu, pri) do { } while (0)
33#define cpupri_init() do { } while (0) 34#define cpupri_init() do { } while (0)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 26ed8e3d1c15..baf2f17af462 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -53,6 +53,40 @@ static unsigned long nsec_low(unsigned long long nsec)
53 53
54#define SPLIT_NS(x) nsec_high(x), nsec_low(x) 54#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
55 55
56#ifdef CONFIG_FAIR_GROUP_SCHED
57static void print_cfs_group_stats(struct seq_file *m, int cpu,
58 struct task_group *tg)
59{
60 struct sched_entity *se = tg->se[cpu];
61 if (!se)
62 return;
63
64#define P(F) \
65 SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
66#define PN(F) \
67 SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
68
69 PN(se->exec_start);
70 PN(se->vruntime);
71 PN(se->sum_exec_runtime);
72#ifdef CONFIG_SCHEDSTATS
73 PN(se->wait_start);
74 PN(se->sleep_start);
75 PN(se->block_start);
76 PN(se->sleep_max);
77 PN(se->block_max);
78 PN(se->exec_max);
79 PN(se->slice_max);
80 PN(se->wait_max);
81 PN(se->wait_sum);
82 P(se->wait_count);
83#endif
84 P(se->load.weight);
85#undef PN
86#undef P
87}
88#endif
89
56static void 90static void
57print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 91print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
58{ 92{
@@ -121,14 +155,9 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
121 155
122#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED) 156#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_FAIR_GROUP_SCHED)
123 char path[128] = ""; 157 char path[128] = "";
124 struct cgroup *cgroup = NULL;
125 struct task_group *tg = cfs_rq->tg; 158 struct task_group *tg = cfs_rq->tg;
126 159
127 if (tg) 160 cgroup_path(tg->css.cgroup, path, sizeof(path));
128 cgroup = tg->css.cgroup;
129
130 if (cgroup)
131 cgroup_path(cgroup, path, sizeof(path));
132 161
133 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path); 162 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, path);
134#else 163#else
@@ -168,6 +197,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
168#ifdef CONFIG_SMP 197#ifdef CONFIG_SMP
169 SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares); 198 SEQ_printf(m, " .%-30s: %lu\n", "shares", cfs_rq->shares);
170#endif 199#endif
200 print_cfs_group_stats(m, cpu, cfs_rq->tg);
171#endif 201#endif
172} 202}
173 203
@@ -175,14 +205,9 @@ void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
175{ 205{
176#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED) 206#if defined(CONFIG_CGROUP_SCHED) && defined(CONFIG_RT_GROUP_SCHED)
177 char path[128] = ""; 207 char path[128] = "";
178 struct cgroup *cgroup = NULL;
179 struct task_group *tg = rt_rq->tg; 208 struct task_group *tg = rt_rq->tg;
180 209
181 if (tg) 210 cgroup_path(tg->css.cgroup, path, sizeof(path));
182 cgroup = tg->css.cgroup;
183
184 if (cgroup)
185 cgroup_path(cgroup, path, sizeof(path));
186 211
187 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path); 212 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, path);
188#else 213#else
@@ -272,7 +297,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
272 u64 now = ktime_to_ns(ktime_get()); 297 u64 now = ktime_to_ns(ktime_get());
273 int cpu; 298 int cpu;
274 299
275 SEQ_printf(m, "Sched Debug Version: v0.07, %s %.*s\n", 300 SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
276 init_utsname()->release, 301 init_utsname()->release,
277 (int)strcspn(init_utsname()->version, " "), 302 (int)strcspn(init_utsname()->version, " "),
278 init_utsname()->version); 303 init_utsname()->version);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 98345e45b059..08ffffd4a410 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1017,14 +1017,13 @@ static void yield_task_fair(struct rq *rq)
1017 * search starts with cpus closest then further out as needed, 1017 * search starts with cpus closest then further out as needed,
1018 * so we always favor a closer, idle cpu. 1018 * so we always favor a closer, idle cpu.
1019 * Domains may include CPUs that are not usable for migration, 1019 * Domains may include CPUs that are not usable for migration,
1020 * hence we need to mask them out (cpu_active_map) 1020 * hence we need to mask them out (cpu_active_mask)
1021 * 1021 *
1022 * Returns the CPU we should wake onto. 1022 * Returns the CPU we should wake onto.
1023 */ 1023 */
1024#if defined(ARCH_HAS_SCHED_WAKE_IDLE) 1024#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1025static int wake_idle(int cpu, struct task_struct *p) 1025static int wake_idle(int cpu, struct task_struct *p)
1026{ 1026{
1027 cpumask_t tmp;
1028 struct sched_domain *sd; 1027 struct sched_domain *sd;
1029 int i; 1028 int i;
1030 1029
@@ -1044,10 +1043,9 @@ static int wake_idle(int cpu, struct task_struct *p)
1044 if ((sd->flags & SD_WAKE_IDLE) 1043 if ((sd->flags & SD_WAKE_IDLE)
1045 || ((sd->flags & SD_WAKE_IDLE_FAR) 1044 || ((sd->flags & SD_WAKE_IDLE_FAR)
1046 && !task_hot(p, task_rq(p)->clock, sd))) { 1045 && !task_hot(p, task_rq(p)->clock, sd))) {
1047 cpus_and(tmp, sd->span, p->cpus_allowed); 1046 for_each_cpu_and(i, sched_domain_span(sd),
1048 cpus_and(tmp, tmp, cpu_active_map); 1047 &p->cpus_allowed) {
1049 for_each_cpu_mask_nr(i, tmp) { 1048 if (cpu_active(i) && idle_cpu(i)) {
1050 if (idle_cpu(i)) {
1051 if (i != task_cpu(p)) { 1049 if (i != task_cpu(p)) {
1052 schedstat_inc(p, 1050 schedstat_inc(p,
1053 se.nr_wakeups_idle); 1051 se.nr_wakeups_idle);
@@ -1240,13 +1238,13 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1240 * this_cpu and prev_cpu are present in: 1238 * this_cpu and prev_cpu are present in:
1241 */ 1239 */
1242 for_each_domain(this_cpu, sd) { 1240 for_each_domain(this_cpu, sd) {
1243 if (cpu_isset(prev_cpu, sd->span)) { 1241 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
1244 this_sd = sd; 1242 this_sd = sd;
1245 break; 1243 break;
1246 } 1244 }
1247 } 1245 }
1248 1246
1249 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) 1247 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
1250 goto out; 1248 goto out;
1251 1249
1252 /* 1250 /*
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d9ba9d5f99d6..94aab72f6a02 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -15,7 +15,7 @@ static inline void rt_set_overload(struct rq *rq)
15 if (!rq->online) 15 if (!rq->online)
16 return; 16 return;
17 17
18 cpu_set(rq->cpu, rq->rd->rto_mask); 18 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask);
19 /* 19 /*
20 * Make sure the mask is visible before we set 20 * Make sure the mask is visible before we set
21 * the overload count. That is checked to determine 21 * the overload count. That is checked to determine
@@ -34,7 +34,7 @@ static inline void rt_clear_overload(struct rq *rq)
34 34
35 /* the order here really doesn't matter */ 35 /* the order here really doesn't matter */
36 atomic_dec(&rq->rd->rto_count); 36 atomic_dec(&rq->rd->rto_count);
37 cpu_clear(rq->cpu, rq->rd->rto_mask); 37 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
38} 38}
39 39
40static void update_rt_migration(struct rq *rq) 40static void update_rt_migration(struct rq *rq)
@@ -139,14 +139,14 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
139} 139}
140 140
141#ifdef CONFIG_SMP 141#ifdef CONFIG_SMP
142static inline cpumask_t sched_rt_period_mask(void) 142static inline const struct cpumask *sched_rt_period_mask(void)
143{ 143{
144 return cpu_rq(smp_processor_id())->rd->span; 144 return cpu_rq(smp_processor_id())->rd->span;
145} 145}
146#else 146#else
147static inline cpumask_t sched_rt_period_mask(void) 147static inline const struct cpumask *sched_rt_period_mask(void)
148{ 148{
149 return cpu_online_map; 149 return cpu_online_mask;
150} 150}
151#endif 151#endif
152 152
@@ -212,9 +212,9 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq)
212 return rt_rq->rt_throttled; 212 return rt_rq->rt_throttled;
213} 213}
214 214
215static inline cpumask_t sched_rt_period_mask(void) 215static inline const struct cpumask *sched_rt_period_mask(void)
216{ 216{
217 return cpu_online_map; 217 return cpu_online_mask;
218} 218}
219 219
220static inline 220static inline
@@ -241,11 +241,11 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
241 int i, weight, more = 0; 241 int i, weight, more = 0;
242 u64 rt_period; 242 u64 rt_period;
243 243
244 weight = cpus_weight(rd->span); 244 weight = cpumask_weight(rd->span);
245 245
246 spin_lock(&rt_b->rt_runtime_lock); 246 spin_lock(&rt_b->rt_runtime_lock);
247 rt_period = ktime_to_ns(rt_b->rt_period); 247 rt_period = ktime_to_ns(rt_b->rt_period);
248 for_each_cpu_mask_nr(i, rd->span) { 248 for_each_cpu(i, rd->span) {
249 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 249 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
250 s64 diff; 250 s64 diff;
251 251
@@ -324,7 +324,7 @@ static void __disable_runtime(struct rq *rq)
324 /* 324 /*
325 * Greedy reclaim, take back as much as we can. 325 * Greedy reclaim, take back as much as we can.
326 */ 326 */
327 for_each_cpu_mask(i, rd->span) { 327 for_each_cpu(i, rd->span) {
328 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 328 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
329 s64 diff; 329 s64 diff;
330 330
@@ -429,13 +429,13 @@ static inline int balance_runtime(struct rt_rq *rt_rq)
429static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 429static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
430{ 430{
431 int i, idle = 1; 431 int i, idle = 1;
432 cpumask_t span; 432 const struct cpumask *span;
433 433
434 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 434 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
435 return 1; 435 return 1;
436 436
437 span = sched_rt_period_mask(); 437 span = sched_rt_period_mask();
438 for_each_cpu_mask(i, span) { 438 for_each_cpu(i, span) {
439 int enqueue = 0; 439 int enqueue = 0;
440 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 440 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i);
441 struct rq *rq = rq_of_rt_rq(rt_rq); 441 struct rq *rq = rq_of_rt_rq(rt_rq);
@@ -537,13 +537,13 @@ static void update_curr_rt(struct rq *rq)
537 for_each_sched_rt_entity(rt_se) { 537 for_each_sched_rt_entity(rt_se) {
538 rt_rq = rt_rq_of_se(rt_se); 538 rt_rq = rt_rq_of_se(rt_se);
539 539
540 spin_lock(&rt_rq->rt_runtime_lock);
541 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 540 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
541 spin_lock(&rt_rq->rt_runtime_lock);
542 rt_rq->rt_time += delta_exec; 542 rt_rq->rt_time += delta_exec;
543 if (sched_rt_runtime_exceeded(rt_rq)) 543 if (sched_rt_runtime_exceeded(rt_rq))
544 resched_task(curr); 544 resched_task(curr);
545 spin_unlock(&rt_rq->rt_runtime_lock);
545 } 546 }
546 spin_unlock(&rt_rq->rt_runtime_lock);
547 } 547 }
548} 548}
549 549
@@ -805,17 +805,20 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
805 805
806static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 806static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
807{ 807{
808 cpumask_t mask; 808 cpumask_var_t mask;
809 809
810 if (rq->curr->rt.nr_cpus_allowed == 1) 810 if (rq->curr->rt.nr_cpus_allowed == 1)
811 return; 811 return;
812 812
813 if (p->rt.nr_cpus_allowed != 1 813 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
814 && cpupri_find(&rq->rd->cpupri, p, &mask))
815 return; 814 return;
816 815
817 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) 816 if (p->rt.nr_cpus_allowed != 1
818 return; 817 && cpupri_find(&rq->rd->cpupri, p, mask))
818 goto free;
819
820 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask))
821 goto free;
819 822
820 /* 823 /*
821 * There appears to be other cpus that can accept 824 * There appears to be other cpus that can accept
@@ -824,6 +827,8 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
824 */ 827 */
825 requeue_task_rt(rq, p, 1); 828 requeue_task_rt(rq, p, 1);
826 resched_task(rq->curr); 829 resched_task(rq->curr);
830free:
831 free_cpumask_var(mask);
827} 832}
828 833
829#endif /* CONFIG_SMP */ 834#endif /* CONFIG_SMP */
@@ -910,14 +915,15 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
910#define RT_MAX_TRIES 3 915#define RT_MAX_TRIES 3
911 916
912static int double_lock_balance(struct rq *this_rq, struct rq *busiest); 917static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
913static void double_unlock_balance(struct rq *this_rq, struct rq *busiest); 918static inline void double_unlock_balance(struct rq *this_rq,
919 struct rq *busiest);
914 920
915static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 921static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
916 922
917static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 923static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
918{ 924{
919 if (!task_running(rq, p) && 925 if (!task_running(rq, p) &&
920 (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) && 926 (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
921 (p->rt.nr_cpus_allowed > 1)) 927 (p->rt.nr_cpus_allowed > 1))
922 return 1; 928 return 1;
923 return 0; 929 return 0;
@@ -956,7 +962,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
956 return next; 962 return next;
957} 963}
958 964
959static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); 965static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
960 966
961static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) 967static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
962{ 968{
@@ -976,7 +982,7 @@ static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
976static int find_lowest_rq(struct task_struct *task) 982static int find_lowest_rq(struct task_struct *task)
977{ 983{
978 struct sched_domain *sd; 984 struct sched_domain *sd;
979 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); 985 struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
980 int this_cpu = smp_processor_id(); 986 int this_cpu = smp_processor_id();
981 int cpu = task_cpu(task); 987 int cpu = task_cpu(task);
982 988
@@ -991,7 +997,7 @@ static int find_lowest_rq(struct task_struct *task)
991 * I guess we might want to change cpupri_find() to ignore those 997 * I guess we might want to change cpupri_find() to ignore those
992 * in the first place. 998 * in the first place.
993 */ 999 */
994 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); 1000 cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
995 1001
996 /* 1002 /*
997 * At this point we have built a mask of cpus representing the 1003 * At this point we have built a mask of cpus representing the
@@ -1001,7 +1007,7 @@ static int find_lowest_rq(struct task_struct *task)
1001 * We prioritize the last cpu that the task executed on since 1007 * We prioritize the last cpu that the task executed on since
1002 * it is most likely cache-hot in that location. 1008 * it is most likely cache-hot in that location.
1003 */ 1009 */
1004 if (cpu_isset(cpu, *lowest_mask)) 1010 if (cpumask_test_cpu(cpu, lowest_mask))
1005 return cpu; 1011 return cpu;
1006 1012
1007 /* 1013 /*
@@ -1016,7 +1022,8 @@ static int find_lowest_rq(struct task_struct *task)
1016 cpumask_t domain_mask; 1022 cpumask_t domain_mask;
1017 int best_cpu; 1023 int best_cpu;
1018 1024
1019 cpus_and(domain_mask, sd->span, *lowest_mask); 1025 cpumask_and(&domain_mask, sched_domain_span(sd),
1026 lowest_mask);
1020 1027
1021 best_cpu = pick_optimal_cpu(this_cpu, 1028 best_cpu = pick_optimal_cpu(this_cpu,
1022 &domain_mask); 1029 &domain_mask);
@@ -1057,8 +1064,8 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1057 * Also make sure that it wasn't scheduled on its rq. 1064 * Also make sure that it wasn't scheduled on its rq.
1058 */ 1065 */
1059 if (unlikely(task_rq(task) != rq || 1066 if (unlikely(task_rq(task) != rq ||
1060 !cpu_isset(lowest_rq->cpu, 1067 !cpumask_test_cpu(lowest_rq->cpu,
1061 task->cpus_allowed) || 1068 &task->cpus_allowed) ||
1062 task_running(rq, task) || 1069 task_running(rq, task) ||
1063 !task->se.on_rq)) { 1070 !task->se.on_rq)) {
1064 1071
@@ -1179,7 +1186,7 @@ static int pull_rt_task(struct rq *this_rq)
1179 1186
1180 next = pick_next_task_rt(this_rq); 1187 next = pick_next_task_rt(this_rq);
1181 1188
1182 for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { 1189 for_each_cpu(cpu, this_rq->rd->rto_mask) {
1183 if (this_cpu == cpu) 1190 if (this_cpu == cpu)
1184 continue; 1191 continue;
1185 1192
@@ -1308,9 +1315,9 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
1308} 1315}
1309 1316
1310static void set_cpus_allowed_rt(struct task_struct *p, 1317static void set_cpus_allowed_rt(struct task_struct *p,
1311 const cpumask_t *new_mask) 1318 const struct cpumask *new_mask)
1312{ 1319{
1313 int weight = cpus_weight(*new_mask); 1320 int weight = cpumask_weight(new_mask);
1314 1321
1315 BUG_ON(!rt_task(p)); 1322 BUG_ON(!rt_task(p));
1316 1323
@@ -1331,7 +1338,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1331 update_rt_migration(rq); 1338 update_rt_migration(rq);
1332 } 1339 }
1333 1340
1334 p->cpus_allowed = *new_mask; 1341 cpumask_copy(&p->cpus_allowed, new_mask);
1335 p->rt.nr_cpus_allowed = weight; 1342 p->rt.nr_cpus_allowed = weight;
1336} 1343}
1337 1344
@@ -1374,6 +1381,14 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p,
1374 if (!rq->rt.rt_nr_running) 1381 if (!rq->rt.rt_nr_running)
1375 pull_rt_task(rq); 1382 pull_rt_task(rq);
1376} 1383}
1384
1385static inline void init_sched_rt_class(void)
1386{
1387 unsigned int i;
1388
1389 for_each_possible_cpu(i)
1390 alloc_cpumask_var(&per_cpu(local_cpu_mask, i), GFP_KERNEL);
1391}
1377#endif /* CONFIG_SMP */ 1392#endif /* CONFIG_SMP */
1378 1393
1379/* 1394/*
@@ -1544,3 +1559,4 @@ static void print_rt_stats(struct seq_file *m, int cpu)
1544 rcu_read_unlock(); 1559 rcu_read_unlock();
1545} 1560}
1546#endif /* CONFIG_SCHED_DEBUG */ 1561#endif /* CONFIG_SCHED_DEBUG */
1562
diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h
index 7dbf72a2b02c..ce340835d055 100644
--- a/kernel/sched_stats.h
+++ b/kernel/sched_stats.h
@@ -42,7 +42,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
42 for_each_domain(cpu, sd) { 42 for_each_domain(cpu, sd) {
43 enum cpu_idle_type itype; 43 enum cpu_idle_type itype;
44 44
45 cpumask_scnprintf(mask_str, mask_len, sd->span); 45 cpumask_scnprintf(mask_str, mask_len,
46 *sched_domain_span(sd));
46 seq_printf(seq, "domain%d %s", dcount++, mask_str); 47 seq_printf(seq, "domain%d %s", dcount++, mask_str);
47 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; 48 for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES;
48 itype++) { 49 itype++) {
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 3953e4aed733..884e6cd2769c 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -164,7 +164,7 @@ unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
164/* 164/*
165 * Zero means infinite timeout - no checking done: 165 * Zero means infinite timeout - no checking done:
166 */ 166 */
167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; 167unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
168 168
169unsigned long __read_mostly sysctl_hung_task_warnings = 10; 169unsigned long __read_mostly sysctl_hung_task_warnings = 10;
170 170
diff --git a/kernel/sys.c b/kernel/sys.c
index 31deba8f7d16..5fc3a0cfb994 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -858,8 +858,8 @@ void do_sys_times(struct tms *tms)
858 struct task_cputime cputime; 858 struct task_cputime cputime;
859 cputime_t cutime, cstime; 859 cputime_t cutime, cstime;
860 860
861 spin_lock_irq(&current->sighand->siglock);
862 thread_group_cputime(current, &cputime); 861 thread_group_cputime(current, &cputime);
862 spin_lock_irq(&current->sighand->siglock);
863 cutime = current->signal->cutime; 863 cutime = current->signal->cutime;
864 cstime = current->signal->cstime; 864 cstime = current->signal->cstime;
865 spin_unlock_irq(&current->sighand->siglock); 865 spin_unlock_irq(&current->sighand->siglock);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 342fc9ccab46..70f872c71f4e 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -144,7 +144,7 @@ void tick_nohz_update_jiffies(void)
144 if (!ts->tick_stopped) 144 if (!ts->tick_stopped)
145 return; 145 return;
146 146
147 cpu_clear(cpu, nohz_cpu_mask); 147 cpumask_clear_cpu(cpu, nohz_cpu_mask);
148 now = ktime_get(); 148 now = ktime_get();
149 ts->idle_waketime = now; 149 ts->idle_waketime = now;
150 150
@@ -283,7 +283,7 @@ void tick_nohz_stop_sched_tick(int inidle)
283 if ((long)delta_jiffies >= 1) { 283 if ((long)delta_jiffies >= 1) {
284 284
285 if (delta_jiffies > 1) 285 if (delta_jiffies > 1)
286 cpu_set(cpu, nohz_cpu_mask); 286 cpumask_set_cpu(cpu, nohz_cpu_mask);
287 /* 287 /*
288 * nohz_stop_sched_tick can be called several times before 288 * nohz_stop_sched_tick can be called several times before
289 * the nohz_restart_sched_tick is called. This happens when 289 * the nohz_restart_sched_tick is called. This happens when
@@ -296,7 +296,7 @@ void tick_nohz_stop_sched_tick(int inidle)
296 /* 296 /*
297 * sched tick not stopped! 297 * sched tick not stopped!
298 */ 298 */
299 cpu_clear(cpu, nohz_cpu_mask); 299 cpumask_clear_cpu(cpu, nohz_cpu_mask);
300 goto out; 300 goto out;
301 } 301 }
302 302
@@ -354,7 +354,7 @@ void tick_nohz_stop_sched_tick(int inidle)
354 * softirq. 354 * softirq.
355 */ 355 */
356 tick_do_update_jiffies64(ktime_get()); 356 tick_do_update_jiffies64(ktime_get());
357 cpu_clear(cpu, nohz_cpu_mask); 357 cpumask_clear_cpu(cpu, nohz_cpu_mask);
358 } 358 }
359 raise_softirq_irqoff(TIMER_SOFTIRQ); 359 raise_softirq_irqoff(TIMER_SOFTIRQ);
360out: 360out:
@@ -432,7 +432,7 @@ void tick_nohz_restart_sched_tick(void)
432 select_nohz_load_balancer(0); 432 select_nohz_load_balancer(0);
433 now = ktime_get(); 433 now = ktime_get();
434 tick_do_update_jiffies64(now); 434 tick_do_update_jiffies64(now);
435 cpu_clear(cpu, nohz_cpu_mask); 435 cpumask_clear_cpu(cpu, nohz_cpu_mask);
436 436
437 /* 437 /*
438 * We stopped the tick in idle. Update process times would miss the 438 * We stopped the tick in idle. Update process times would miss the
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443bc..1e3fd3e3436a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -545,6 +545,16 @@ config DEBUG_SG
545 545
546 If unsure, say N. 546 If unsure, say N.
547 547
548config DEBUG_NOTIFIERS
549 bool "Debug notifier call chains"
550 depends on DEBUG_KERNEL
551 help
552 Enable this to turn on sanity checking for notifier call chains.
553 This is most useful for kernel developers to make sure that
554 modules properly unregister themselves from notifier chains.
555 This is a relatively cheap check but if you care about maximum
556 performance, say N.
557
548config FRAME_POINTER 558config FRAME_POINTER
549 bool "Compile the kernel with frame pointers" 559 bool "Compile the kernel with frame pointers"
550 depends on DEBUG_KERNEL && \ 560 depends on DEBUG_KERNEL && \
diff --git a/mm/memory.c b/mm/memory.c
index 164951c47305..fc031d68327e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3049,3 +3049,18 @@ void print_vma_addr(char *prefix, unsigned long ip)
3049 } 3049 }
3050 up_read(&current->mm->mmap_sem); 3050 up_read(&current->mm->mmap_sem);
3051} 3051}
3052
3053#ifdef CONFIG_PROVE_LOCKING
3054void might_fault(void)
3055{
3056 might_sleep();
3057 /*
3058 * it would be nicer only to annotate paths which are not under
3059 * pagefault_disable, however that requires a larger audit and
3060 * providing helpers like get_user_atomic.
3061 */
3062 if (!in_atomic() && current->mm)
3063 might_lock_read(&current->mm->mmap_sem);
3064}
3065EXPORT_SYMBOL(might_fault);
3066#endif