aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2013-04-24 14:33:46 -0400
committerThomas Gleixner <tglx@linutronix.de>2013-04-24 14:33:54 -0400
commit6402c7dc2a19c19bd8cdc7d80878b850da418942 (patch)
treecda2ea2df40442e2aa016119f3548cc504127ea8 /kernel
parent77c675ba18836802f6b73d2d773481d06ebc0f04 (diff)
parent60d509fa6a9c4653a86ad830e4c4b30360b23f0e (diff)
Merge branch 'linus' into timers/core
Reason: Get upstream fixes before adding conflicting code. Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c24
-rw-r--r--kernel/events/core.c14
-rw-r--r--kernel/events/internal.h2
-rw-r--r--kernel/events/ring_buffer.c22
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/futex.c46
-rw-r--r--kernel/hrtimer.c3
-rw-r--r--kernel/kexec.c118
-rw-r--r--kernel/kprobes.c19
-rw-r--r--kernel/kthread.c52
-rw-r--r--kernel/lockdep.c17
-rw-r--r--kernel/pid_namespace.c3
-rw-r--r--kernel/printk.c80
-rw-r--r--kernel/sched/clock.c26
-rw-r--r--kernel/sched/core.c8
-rw-r--r--kernel/sched/cputime.c2
-rw-r--r--kernel/signal.c7
-rw-r--r--kernel/smpboot.c14
-rw-r--r--kernel/sys.c60
-rw-r--r--kernel/time/tick-broadcast.c3
-rw-r--r--kernel/trace/blktrace.c26
-rw-r--r--kernel/trace/ftrace.c58
-rw-r--r--kernel/trace/trace.c68
-rw-r--r--kernel/trace/trace.h6
-rw-r--r--kernel/trace/trace_irqsoff.c19
-rw-r--r--kernel/trace/trace_sched_wakeup.c18
-rw-r--r--kernel/trace/trace_stack.c2
-rw-r--r--kernel/user.c2
-rw-r--r--kernel/user_namespace.c37
-rw-r--r--kernel/workqueue.c51
31 files changed, 539 insertions, 275 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index 493d97259484..f6c2ce5701e1 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -393,6 +393,30 @@ bool ns_capable(struct user_namespace *ns, int cap)
393EXPORT_SYMBOL(ns_capable); 393EXPORT_SYMBOL(ns_capable);
394 394
395/** 395/**
396 * file_ns_capable - Determine if the file's opener had a capability in effect
397 * @file: The file we want to check
398 * @ns: The usernamespace we want the capability in
399 * @cap: The capability to be tested for
400 *
401 * Return true if task that opened the file had a capability in effect
402 * when the file was opened.
403 *
404 * This does not set PF_SUPERPRIV because the caller may not
405 * actually be privileged.
406 */
407bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap)
408{
409 if (WARN_ON_ONCE(!cap_valid(cap)))
410 return false;
411
412 if (security_capable(file->f_cred, ns, cap) == 0)
413 return true;
414
415 return false;
416}
417EXPORT_SYMBOL(file_ns_capable);
418
419/**
396 * capable - Determine if the current task has a superior capability in effect 420 * capable - Determine if the current task has a superior capability in effect
397 * @cap: The capability to be tested for 421 * @cap: The capability to be tested for
398 * 422 *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b0cd86501c30..4d3124b39277 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4434,12 +4434,15 @@ static void perf_event_task_event(struct perf_task_event *task_event)
4434 if (ctxn < 0) 4434 if (ctxn < 0)
4435 goto next; 4435 goto next;
4436 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); 4436 ctx = rcu_dereference(current->perf_event_ctxp[ctxn]);
4437 if (ctx)
4438 perf_event_task_ctx(ctx, task_event);
4437 } 4439 }
4438 if (ctx)
4439 perf_event_task_ctx(ctx, task_event);
4440next: 4440next:
4441 put_cpu_ptr(pmu->pmu_cpu_context); 4441 put_cpu_ptr(pmu->pmu_cpu_context);
4442 } 4442 }
4443 if (task_event->task_ctx)
4444 perf_event_task_ctx(task_event->task_ctx, task_event);
4445
4443 rcu_read_unlock(); 4446 rcu_read_unlock();
4444} 4447}
4445 4448
@@ -4734,7 +4737,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
4734 } else { 4737 } else {
4735 if (arch_vma_name(mmap_event->vma)) { 4738 if (arch_vma_name(mmap_event->vma)) {
4736 name = strncpy(tmp, arch_vma_name(mmap_event->vma), 4739 name = strncpy(tmp, arch_vma_name(mmap_event->vma),
4737 sizeof(tmp)); 4740 sizeof(tmp) - 1);
4741 tmp[sizeof(tmp) - 1] = '\0';
4738 goto got_name; 4742 goto got_name;
4739 } 4743 }
4740 4744
@@ -5327,7 +5331,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
5327 5331
5328static int perf_swevent_init(struct perf_event *event) 5332static int perf_swevent_init(struct perf_event *event)
5329{ 5333{
5330 int event_id = event->attr.config; 5334 u64 event_id = event->attr.config;
5331 5335
5332 if (event->attr.type != PERF_TYPE_SOFTWARE) 5336 if (event->attr.type != PERF_TYPE_SOFTWARE)
5333 return -ENOENT; 5337 return -ENOENT;
@@ -5647,6 +5651,7 @@ static void perf_swevent_init_hrtimer(struct perf_event *event)
5647 event->attr.sample_period = NSEC_PER_SEC / freq; 5651 event->attr.sample_period = NSEC_PER_SEC / freq;
5648 hwc->sample_period = event->attr.sample_period; 5652 hwc->sample_period = event->attr.sample_period;
5649 local64_set(&hwc->period_left, hwc->sample_period); 5653 local64_set(&hwc->period_left, hwc->sample_period);
5654 hwc->last_period = hwc->sample_period;
5650 event->attr.freq = 0; 5655 event->attr.freq = 0;
5651 } 5656 }
5652} 5657}
@@ -5982,6 +5987,7 @@ skip_type:
5982 if (pmu->pmu_cpu_context) 5987 if (pmu->pmu_cpu_context)
5983 goto got_cpu_context; 5988 goto got_cpu_context;
5984 5989
5990 ret = -ENOMEM;
5985 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); 5991 pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context);
5986 if (!pmu->pmu_cpu_context) 5992 if (!pmu->pmu_cpu_context)
5987 goto free_dev; 5993 goto free_dev;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index d56a64c99a8b..eb675c4d59df 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -16,7 +16,7 @@ struct ring_buffer {
16 int page_order; /* allocation order */ 16 int page_order; /* allocation order */
17#endif 17#endif
18 int nr_pages; /* nr of data pages */ 18 int nr_pages; /* nr of data pages */
19 int writable; /* are we writable */ 19 int overwrite; /* can overwrite itself */
20 20
21 atomic_t poll; /* POLL_ for wakeups */ 21 atomic_t poll; /* POLL_ for wakeups */
22 22
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 23cb34ff3973..97fddb09762b 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -18,12 +18,24 @@
18static bool perf_output_space(struct ring_buffer *rb, unsigned long tail, 18static bool perf_output_space(struct ring_buffer *rb, unsigned long tail,
19 unsigned long offset, unsigned long head) 19 unsigned long offset, unsigned long head)
20{ 20{
21 unsigned long mask; 21 unsigned long sz = perf_data_size(rb);
22 unsigned long mask = sz - 1;
22 23
23 if (!rb->writable) 24 /*
25 * check if user-writable
26 * overwrite : over-write its own tail
27 * !overwrite: buffer possibly drops events.
28 */
29 if (rb->overwrite)
24 return true; 30 return true;
25 31
26 mask = perf_data_size(rb) - 1; 32 /*
33 * verify that payload is not bigger than buffer
34 * otherwise masking logic may fail to detect
35 * the "not enough space" condition
36 */
37 if ((head - offset) > sz)
38 return false;
27 39
28 offset = (offset - tail) & mask; 40 offset = (offset - tail) & mask;
29 head = (head - tail) & mask; 41 head = (head - tail) & mask;
@@ -212,7 +224,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
212 rb->watermark = max_size / 2; 224 rb->watermark = max_size / 2;
213 225
214 if (flags & RING_BUFFER_WRITABLE) 226 if (flags & RING_BUFFER_WRITABLE)
215 rb->writable = 1; 227 rb->overwrite = 0;
228 else
229 rb->overwrite = 1;
216 230
217 atomic_set(&rb->refcount, 1); 231 atomic_set(&rb->refcount, 1);
218 232
diff --git a/kernel/exit.c b/kernel/exit.c
index 51e485ca9935..60bc027c61c3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -835,7 +835,7 @@ void do_exit(long code)
835 /* 835 /*
836 * Make sure we are holding no locks: 836 * Make sure we are holding no locks:
837 */ 837 */
838 debug_check_no_locks_held(); 838 debug_check_no_locks_held(tsk);
839 /* 839 /*
840 * We can do this unlocked here. The futex code uses this flag 840 * We can do this unlocked here. The futex code uses this flag
841 * just to verify whether the pi state cleanup has been done 841 * just to verify whether the pi state cleanup has been done
diff --git a/kernel/fork.c b/kernel/fork.c
index 8d932b1c9056..1766d324d5e3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1141,6 +1141,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1141 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS)) 1141 if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
1142 return ERR_PTR(-EINVAL); 1142 return ERR_PTR(-EINVAL);
1143 1143
1144 if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
1145 return ERR_PTR(-EINVAL);
1146
1144 /* 1147 /*
1145 * Thread groups must share signals as well, and detached threads 1148 * Thread groups must share signals as well, and detached threads
1146 * can only be started up within the thread group. 1149 * can only be started up within the thread group.
@@ -1807,7 +1810,7 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
1807 * If unsharing a user namespace must also unshare the thread. 1810 * If unsharing a user namespace must also unshare the thread.
1808 */ 1811 */
1809 if (unshare_flags & CLONE_NEWUSER) 1812 if (unshare_flags & CLONE_NEWUSER)
1810 unshare_flags |= CLONE_THREAD; 1813 unshare_flags |= CLONE_THREAD | CLONE_FS;
1811 /* 1814 /*
1812 * If unsharing a pid namespace must also unshare the thread. 1815 * If unsharing a pid namespace must also unshare the thread.
1813 */ 1816 */
diff --git a/kernel/futex.c b/kernel/futex.c
index f0090a993dab..b26dcfc02c94 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -223,7 +223,8 @@ static void drop_futex_key_refs(union futex_key *key)
223 * @rw: mapping needs to be read/write (values: VERIFY_READ, 223 * @rw: mapping needs to be read/write (values: VERIFY_READ,
224 * VERIFY_WRITE) 224 * VERIFY_WRITE)
225 * 225 *
226 * Returns a negative error code or 0 226 * Return: a negative error code or 0
227 *
227 * The key words are stored in *key on success. 228 * The key words are stored in *key on success.
228 * 229 *
229 * For shared mappings, it's (page->index, file_inode(vma->vm_file), 230 * For shared mappings, it's (page->index, file_inode(vma->vm_file),
@@ -705,9 +706,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
705 * be "current" except in the case of requeue pi. 706 * be "current" except in the case of requeue pi.
706 * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) 707 * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0)
707 * 708 *
708 * Returns: 709 * Return:
709 * 0 - ready to wait 710 * 0 - ready to wait;
710 * 1 - acquired the lock 711 * 1 - acquired the lock;
711 * <0 - error 712 * <0 - error
712 * 713 *
713 * The hb->lock and futex_key refs shall be held by the caller. 714 * The hb->lock and futex_key refs shall be held by the caller.
@@ -1191,9 +1192,9 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1191 * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. 1192 * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
1192 * hb1 and hb2 must be held by the caller. 1193 * hb1 and hb2 must be held by the caller.
1193 * 1194 *
1194 * Returns: 1195 * Return:
1195 * 0 - failed to acquire the lock atomicly 1196 * 0 - failed to acquire the lock atomically;
1196 * 1 - acquired the lock 1197 * 1 - acquired the lock;
1197 * <0 - error 1198 * <0 - error
1198 */ 1199 */
1199static int futex_proxy_trylock_atomic(u32 __user *pifutex, 1200static int futex_proxy_trylock_atomic(u32 __user *pifutex,
@@ -1254,8 +1255,8 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1254 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire 1255 * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
1255 * uaddr2 atomically on behalf of the top waiter. 1256 * uaddr2 atomically on behalf of the top waiter.
1256 * 1257 *
1257 * Returns: 1258 * Return:
1258 * >=0 - on success, the number of tasks requeued or woken 1259 * >=0 - on success, the number of tasks requeued or woken;
1259 * <0 - on error 1260 * <0 - on error
1260 */ 1261 */
1261static int futex_requeue(u32 __user *uaddr1, unsigned int flags, 1262static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
@@ -1536,8 +1537,8 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
1536 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must 1537 * The q->lock_ptr must not be held by the caller. A call to unqueue_me() must
1537 * be paired with exactly one earlier call to queue_me(). 1538 * be paired with exactly one earlier call to queue_me().
1538 * 1539 *
1539 * Returns: 1540 * Return:
1540 * 1 - if the futex_q was still queued (and we removed unqueued it) 1541 * 1 - if the futex_q was still queued (and we removed unqueued it);
1541 * 0 - if the futex_q was already removed by the waking thread 1542 * 0 - if the futex_q was already removed by the waking thread
1542 */ 1543 */
1543static int unqueue_me(struct futex_q *q) 1544static int unqueue_me(struct futex_q *q)
@@ -1707,9 +1708,9 @@ static long futex_wait_restart(struct restart_block *restart);
1707 * the pi_state owner as well as handle race conditions that may allow us to 1708 * the pi_state owner as well as handle race conditions that may allow us to
1708 * acquire the lock. Must be called with the hb lock held. 1709 * acquire the lock. Must be called with the hb lock held.
1709 * 1710 *
1710 * Returns: 1711 * Return:
1711 * 1 - success, lock taken 1712 * 1 - success, lock taken;
1712 * 0 - success, lock not taken 1713 * 0 - success, lock not taken;
1713 * <0 - on error (-EFAULT) 1714 * <0 - on error (-EFAULT)
1714 */ 1715 */
1715static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked) 1716static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
@@ -1824,8 +1825,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
1824 * Return with the hb lock held and a q.key reference on success, and unlocked 1825 * Return with the hb lock held and a q.key reference on success, and unlocked
1825 * with no q.key reference on failure. 1826 * with no q.key reference on failure.
1826 * 1827 *
1827 * Returns: 1828 * Return:
1828 * 0 - uaddr contains val and hb has been locked 1829 * 0 - uaddr contains val and hb has been locked;
1829 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked 1830 * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked
1830 */ 1831 */
1831static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, 1832static int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
@@ -2203,9 +2204,9 @@ pi_faulted:
2203 * the wakeup and return the appropriate error code to the caller. Must be 2204 * the wakeup and return the appropriate error code to the caller. Must be
2204 * called with the hb lock held. 2205 * called with the hb lock held.
2205 * 2206 *
2206 * Returns 2207 * Return:
2207 * 0 - no early wakeup detected 2208 * 0 = no early wakeup detected;
2208 * <0 - -ETIMEDOUT or -ERESTARTNOINTR 2209 * <0 = -ETIMEDOUT or -ERESTARTNOINTR
2209 */ 2210 */
2210static inline 2211static inline
2211int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, 2212int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
@@ -2247,7 +2248,6 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2247 * @val: the expected value of uaddr 2248 * @val: the expected value of uaddr
2248 * @abs_time: absolute timeout 2249 * @abs_time: absolute timeout
2249 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all 2250 * @bitset: 32 bit wakeup bitset set by userspace, defaults to all
2250 * @clockrt: whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
2251 * @uaddr2: the pi futex we will take prior to returning to user-space 2251 * @uaddr2: the pi futex we will take prior to returning to user-space
2252 * 2252 *
2253 * The caller will wait on uaddr and will be requeued by futex_requeue() to 2253 * The caller will wait on uaddr and will be requeued by futex_requeue() to
@@ -2258,7 +2258,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2258 * there was a need to. 2258 * there was a need to.
2259 * 2259 *
2260 * We call schedule in futex_wait_queue_me() when we enqueue and return there 2260 * We call schedule in futex_wait_queue_me() when we enqueue and return there
2261 * via the following: 2261 * via the following--
2262 * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue() 2262 * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
2263 * 2) wakeup on uaddr2 after a requeue 2263 * 2) wakeup on uaddr2 after a requeue
2264 * 3) signal 2264 * 3) signal
@@ -2276,8 +2276,8 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
2276 * 2276 *
2277 * If 4 or 7, we cleanup and return with -ETIMEDOUT. 2277 * If 4 or 7, we cleanup and return with -ETIMEDOUT.
2278 * 2278 *
2279 * Returns: 2279 * Return:
2280 * 0 - On success 2280 * 0 - On success;
2281 * <0 - On error 2281 * <0 - On error
2282 */ 2282 */
2283static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, 2283static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c0875ae0de17..609d8ff38b74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -63,6 +63,7 @@
63DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 63DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
64{ 64{
65 65
66 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
66 .clock_base = 67 .clock_base =
67 { 68 {
68 { 69 {
@@ -1662,8 +1663,6 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1662 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 1663 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
1663 int i; 1664 int i;
1664 1665
1665 raw_spin_lock_init(&cpu_base->lock);
1666
1667 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1666 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
1668 cpu_base->clock_base[i].cpu_base = cpu_base; 1667 cpu_base->clock_base[i].cpu_base = cpu_base;
1669 timerqueue_init_head(&cpu_base->clock_base[i].active); 1668 timerqueue_init_head(&cpu_base->clock_base[i].active);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bddd3d7a74b6..ffd4e111fd67 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -55,7 +55,7 @@ struct resource crashk_res = {
55 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 55 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
56}; 56};
57struct resource crashk_low_res = { 57struct resource crashk_low_res = {
58 .name = "Crash kernel low", 58 .name = "Crash kernel",
59 .start = 0, 59 .start = 0,
60 .end = 0, 60 .end = 0,
61 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 61 .flags = IORESOURCE_BUSY | IORESOURCE_MEM
@@ -1368,35 +1368,114 @@ static int __init parse_crashkernel_simple(char *cmdline,
1368 return 0; 1368 return 0;
1369} 1369}
1370 1370
1371#define SUFFIX_HIGH 0
1372#define SUFFIX_LOW 1
1373#define SUFFIX_NULL 2
1374static __initdata char *suffix_tbl[] = {
1375 [SUFFIX_HIGH] = ",high",
1376 [SUFFIX_LOW] = ",low",
1377 [SUFFIX_NULL] = NULL,
1378};
1379
1371/* 1380/*
1372 * That function is the entry point for command line parsing and should be 1381 * That function parses "suffix" crashkernel command lines like
1373 * called from the arch-specific code. 1382 *
1383 * crashkernel=size,[high|low]
1384 *
1385 * It returns 0 on success and -EINVAL on failure.
1374 */ 1386 */
1387static int __init parse_crashkernel_suffix(char *cmdline,
1388 unsigned long long *crash_size,
1389 unsigned long long *crash_base,
1390 const char *suffix)
1391{
1392 char *cur = cmdline;
1393
1394 *crash_size = memparse(cmdline, &cur);
1395 if (cmdline == cur) {
1396 pr_warn("crashkernel: memory value expected\n");
1397 return -EINVAL;
1398 }
1399
1400 /* check with suffix */
1401 if (strncmp(cur, suffix, strlen(suffix))) {
1402 pr_warn("crashkernel: unrecognized char\n");
1403 return -EINVAL;
1404 }
1405 cur += strlen(suffix);
1406 if (*cur != ' ' && *cur != '\0') {
1407 pr_warn("crashkernel: unrecognized char\n");
1408 return -EINVAL;
1409 }
1410
1411 return 0;
1412}
1413
1414static __init char *get_last_crashkernel(char *cmdline,
1415 const char *name,
1416 const char *suffix)
1417{
1418 char *p = cmdline, *ck_cmdline = NULL;
1419
1420 /* find crashkernel and use the last one if there are more */
1421 p = strstr(p, name);
1422 while (p) {
1423 char *end_p = strchr(p, ' ');
1424 char *q;
1425
1426 if (!end_p)
1427 end_p = p + strlen(p);
1428
1429 if (!suffix) {
1430 int i;
1431
1432 /* skip the one with any known suffix */
1433 for (i = 0; suffix_tbl[i]; i++) {
1434 q = end_p - strlen(suffix_tbl[i]);
1435 if (!strncmp(q, suffix_tbl[i],
1436 strlen(suffix_tbl[i])))
1437 goto next;
1438 }
1439 ck_cmdline = p;
1440 } else {
1441 q = end_p - strlen(suffix);
1442 if (!strncmp(q, suffix, strlen(suffix)))
1443 ck_cmdline = p;
1444 }
1445next:
1446 p = strstr(p+1, name);
1447 }
1448
1449 if (!ck_cmdline)
1450 return NULL;
1451
1452 return ck_cmdline;
1453}
1454
1375static int __init __parse_crashkernel(char *cmdline, 1455static int __init __parse_crashkernel(char *cmdline,
1376 unsigned long long system_ram, 1456 unsigned long long system_ram,
1377 unsigned long long *crash_size, 1457 unsigned long long *crash_size,
1378 unsigned long long *crash_base, 1458 unsigned long long *crash_base,
1379 const char *name) 1459 const char *name,
1460 const char *suffix)
1380{ 1461{
1381 char *p = cmdline, *ck_cmdline = NULL;
1382 char *first_colon, *first_space; 1462 char *first_colon, *first_space;
1463 char *ck_cmdline;
1383 1464
1384 BUG_ON(!crash_size || !crash_base); 1465 BUG_ON(!crash_size || !crash_base);
1385 *crash_size = 0; 1466 *crash_size = 0;
1386 *crash_base = 0; 1467 *crash_base = 0;
1387 1468
1388 /* find crashkernel and use the last one if there are more */ 1469 ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1389 p = strstr(p, name);
1390 while (p) {
1391 ck_cmdline = p;
1392 p = strstr(p+1, name);
1393 }
1394 1470
1395 if (!ck_cmdline) 1471 if (!ck_cmdline)
1396 return -EINVAL; 1472 return -EINVAL;
1397 1473
1398 ck_cmdline += strlen(name); 1474 ck_cmdline += strlen(name);
1399 1475
1476 if (suffix)
1477 return parse_crashkernel_suffix(ck_cmdline, crash_size,
1478 crash_base, suffix);
1400 /* 1479 /*
1401 * if the commandline contains a ':', then that's the extended 1480 * if the commandline contains a ':', then that's the extended
1402 * syntax -- if not, it must be the classic syntax 1481 * syntax -- if not, it must be the classic syntax
@@ -1413,13 +1492,26 @@ static int __init __parse_crashkernel(char *cmdline,
1413 return 0; 1492 return 0;
1414} 1493}
1415 1494
1495/*
1496 * That function is the entry point for command line parsing and should be
1497 * called from the arch-specific code.
1498 */
1416int __init parse_crashkernel(char *cmdline, 1499int __init parse_crashkernel(char *cmdline,
1417 unsigned long long system_ram, 1500 unsigned long long system_ram,
1418 unsigned long long *crash_size, 1501 unsigned long long *crash_size,
1419 unsigned long long *crash_base) 1502 unsigned long long *crash_base)
1420{ 1503{
1421 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1504 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1422 "crashkernel="); 1505 "crashkernel=", NULL);
1506}
1507
1508int __init parse_crashkernel_high(char *cmdline,
1509 unsigned long long system_ram,
1510 unsigned long long *crash_size,
1511 unsigned long long *crash_base)
1512{
1513 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1514 "crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1423} 1515}
1424 1516
1425int __init parse_crashkernel_low(char *cmdline, 1517int __init parse_crashkernel_low(char *cmdline,
@@ -1428,7 +1520,7 @@ int __init parse_crashkernel_low(char *cmdline,
1428 unsigned long long *crash_base) 1520 unsigned long long *crash_base)
1429{ 1521{
1430 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1522 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1431 "crashkernel_low="); 1523 "crashkernel=", suffix_tbl[SUFFIX_LOW]);
1432} 1524}
1433 1525
1434static void update_vmcoreinfo_note(void) 1526static void update_vmcoreinfo_note(void)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e35be53f6613..3fed7f0cbcdf 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -794,16 +794,16 @@ out:
794} 794}
795 795
796#ifdef CONFIG_SYSCTL 796#ifdef CONFIG_SYSCTL
797/* This should be called with kprobe_mutex locked */
798static void __kprobes optimize_all_kprobes(void) 797static void __kprobes optimize_all_kprobes(void)
799{ 798{
800 struct hlist_head *head; 799 struct hlist_head *head;
801 struct kprobe *p; 800 struct kprobe *p;
802 unsigned int i; 801 unsigned int i;
803 802
803 mutex_lock(&kprobe_mutex);
804 /* If optimization is already allowed, just return */ 804 /* If optimization is already allowed, just return */
805 if (kprobes_allow_optimization) 805 if (kprobes_allow_optimization)
806 return; 806 goto out;
807 807
808 kprobes_allow_optimization = true; 808 kprobes_allow_optimization = true;
809 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 809 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
@@ -813,18 +813,22 @@ static void __kprobes optimize_all_kprobes(void)
813 optimize_kprobe(p); 813 optimize_kprobe(p);
814 } 814 }
815 printk(KERN_INFO "Kprobes globally optimized\n"); 815 printk(KERN_INFO "Kprobes globally optimized\n");
816out:
817 mutex_unlock(&kprobe_mutex);
816} 818}
817 819
818/* This should be called with kprobe_mutex locked */
819static void __kprobes unoptimize_all_kprobes(void) 820static void __kprobes unoptimize_all_kprobes(void)
820{ 821{
821 struct hlist_head *head; 822 struct hlist_head *head;
822 struct kprobe *p; 823 struct kprobe *p;
823 unsigned int i; 824 unsigned int i;
824 825
826 mutex_lock(&kprobe_mutex);
825 /* If optimization is already prohibited, just return */ 827 /* If optimization is already prohibited, just return */
826 if (!kprobes_allow_optimization) 828 if (!kprobes_allow_optimization) {
829 mutex_unlock(&kprobe_mutex);
827 return; 830 return;
831 }
828 832
829 kprobes_allow_optimization = false; 833 kprobes_allow_optimization = false;
830 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 834 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
@@ -834,11 +838,14 @@ static void __kprobes unoptimize_all_kprobes(void)
834 unoptimize_kprobe(p, false); 838 unoptimize_kprobe(p, false);
835 } 839 }
836 } 840 }
841 mutex_unlock(&kprobe_mutex);
842
837 /* Wait for unoptimizing completion */ 843 /* Wait for unoptimizing completion */
838 wait_for_kprobe_optimizer(); 844 wait_for_kprobe_optimizer();
839 printk(KERN_INFO "Kprobes globally unoptimized\n"); 845 printk(KERN_INFO "Kprobes globally unoptimized\n");
840} 846}
841 847
848static DEFINE_MUTEX(kprobe_sysctl_mutex);
842int sysctl_kprobes_optimization; 849int sysctl_kprobes_optimization;
843int proc_kprobes_optimization_handler(struct ctl_table *table, int write, 850int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
844 void __user *buffer, size_t *length, 851 void __user *buffer, size_t *length,
@@ -846,7 +853,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
846{ 853{
847 int ret; 854 int ret;
848 855
849 mutex_lock(&kprobe_mutex); 856 mutex_lock(&kprobe_sysctl_mutex);
850 sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0; 857 sysctl_kprobes_optimization = kprobes_allow_optimization ? 1 : 0;
851 ret = proc_dointvec_minmax(table, write, buffer, length, ppos); 858 ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
852 859
@@ -854,7 +861,7 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
854 optimize_all_kprobes(); 861 optimize_all_kprobes();
855 else 862 else
856 unoptimize_all_kprobes(); 863 unoptimize_all_kprobes();
857 mutex_unlock(&kprobe_mutex); 864 mutex_unlock(&kprobe_sysctl_mutex);
858 865
859 return ret; 866 return ret;
860} 867}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 691dc2ef9baf..9eb7fed0bbaa 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -124,12 +124,12 @@ void *kthread_data(struct task_struct *task)
124 124
125static void __kthread_parkme(struct kthread *self) 125static void __kthread_parkme(struct kthread *self)
126{ 126{
127 __set_current_state(TASK_INTERRUPTIBLE); 127 __set_current_state(TASK_PARKED);
128 while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { 128 while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) {
129 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) 129 if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags))
130 complete(&self->parked); 130 complete(&self->parked);
131 schedule(); 131 schedule();
132 __set_current_state(TASK_INTERRUPTIBLE); 132 __set_current_state(TASK_PARKED);
133 } 133 }
134 clear_bit(KTHREAD_IS_PARKED, &self->flags); 134 clear_bit(KTHREAD_IS_PARKED, &self->flags);
135 __set_current_state(TASK_RUNNING); 135 __set_current_state(TASK_RUNNING);
@@ -256,8 +256,13 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
256} 256}
257EXPORT_SYMBOL(kthread_create_on_node); 257EXPORT_SYMBOL(kthread_create_on_node);
258 258
259static void __kthread_bind(struct task_struct *p, unsigned int cpu) 259static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
260{ 260{
261 /* Must have done schedule() in kthread() before we set_task_cpu */
262 if (!wait_task_inactive(p, state)) {
263 WARN_ON(1);
264 return;
265 }
261 /* It's safe because the task is inactive. */ 266 /* It's safe because the task is inactive. */
262 do_set_cpus_allowed(p, cpumask_of(cpu)); 267 do_set_cpus_allowed(p, cpumask_of(cpu));
263 p->flags |= PF_THREAD_BOUND; 268 p->flags |= PF_THREAD_BOUND;
@@ -274,12 +279,7 @@ static void __kthread_bind(struct task_struct *p, unsigned int cpu)
274 */ 279 */
275void kthread_bind(struct task_struct *p, unsigned int cpu) 280void kthread_bind(struct task_struct *p, unsigned int cpu)
276{ 281{
277 /* Must have done schedule() in kthread() before we set_task_cpu */ 282 __kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
278 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
279 WARN_ON(1);
280 return;
281 }
282 __kthread_bind(p, cpu);
283} 283}
284EXPORT_SYMBOL(kthread_bind); 284EXPORT_SYMBOL(kthread_bind);
285 285
@@ -324,6 +324,22 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
324 return NULL; 324 return NULL;
325} 325}
326 326
327static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
328{
329 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
330 /*
331 * We clear the IS_PARKED bit here as we don't wait
332 * until the task has left the park code. So if we'd
333 * park before that happens we'd see the IS_PARKED bit
334 * which might be about to be cleared.
335 */
336 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
337 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
338 __kthread_bind(k, kthread->cpu, TASK_PARKED);
339 wake_up_state(k, TASK_PARKED);
340 }
341}
342
327/** 343/**
328 * kthread_unpark - unpark a thread created by kthread_create(). 344 * kthread_unpark - unpark a thread created by kthread_create().
329 * @k: thread created by kthread_create(). 345 * @k: thread created by kthread_create().
@@ -336,20 +352,8 @@ void kthread_unpark(struct task_struct *k)
336{ 352{
337 struct kthread *kthread = task_get_live_kthread(k); 353 struct kthread *kthread = task_get_live_kthread(k);
338 354
339 if (kthread) { 355 if (kthread)
340 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 356 __kthread_unpark(k, kthread);
341 /*
342 * We clear the IS_PARKED bit here as we don't wait
343 * until the task has left the park code. So if we'd
344 * park before that happens we'd see the IS_PARKED bit
345 * which might be about to be cleared.
346 */
347 if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
348 if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
349 __kthread_bind(k, kthread->cpu);
350 wake_up_process(k);
351 }
352 }
353 put_task_struct(k); 357 put_task_struct(k);
354} 358}
355 359
@@ -407,7 +411,7 @@ int kthread_stop(struct task_struct *k)
407 trace_sched_kthread_stop(k); 411 trace_sched_kthread_stop(k);
408 if (kthread) { 412 if (kthread) {
409 set_bit(KTHREAD_SHOULD_STOP, &kthread->flags); 413 set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
410 clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); 414 __kthread_unpark(k, kthread);
411 wake_up_process(k); 415 wake_up_process(k);
412 wait_for_completion(&kthread->exited); 416 wait_for_completion(&kthread->exited);
413 } 417 }
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 259db207b5d9..8a0efac4f99d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -4088,7 +4088,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
4088} 4088}
4089EXPORT_SYMBOL_GPL(debug_check_no_locks_freed); 4089EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
4090 4090
4091static void print_held_locks_bug(void) 4091static void print_held_locks_bug(struct task_struct *curr)
4092{ 4092{
4093 if (!debug_locks_off()) 4093 if (!debug_locks_off())
4094 return; 4094 return;
@@ -4097,21 +4097,22 @@ static void print_held_locks_bug(void)
4097 4097
4098 printk("\n"); 4098 printk("\n");
4099 printk("=====================================\n"); 4099 printk("=====================================\n");
4100 printk("[ BUG: %s/%d still has locks held! ]\n", 4100 printk("[ BUG: lock held at task exit time! ]\n");
4101 current->comm, task_pid_nr(current));
4102 print_kernel_ident(); 4101 print_kernel_ident();
4103 printk("-------------------------------------\n"); 4102 printk("-------------------------------------\n");
4104 lockdep_print_held_locks(current); 4103 printk("%s/%d is exiting with locks still held!\n",
4104 curr->comm, task_pid_nr(curr));
4105 lockdep_print_held_locks(curr);
4106
4105 printk("\nstack backtrace:\n"); 4107 printk("\nstack backtrace:\n");
4106 dump_stack(); 4108 dump_stack();
4107} 4109}
4108 4110
4109void debug_check_no_locks_held(void) 4111void debug_check_no_locks_held(struct task_struct *task)
4110{ 4112{
4111 if (unlikely(current->lockdep_depth > 0)) 4113 if (unlikely(task->lockdep_depth > 0))
4112 print_held_locks_bug(); 4114 print_held_locks_bug(task);
4113} 4115}
4114EXPORT_SYMBOL_GPL(debug_check_no_locks_held);
4115 4116
4116void debug_show_all_locks(void) 4117void debug_show_all_locks(void)
4117{ 4118{
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index c1c3dc1c6023..bea15bdf82b0 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -181,6 +181,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
181 int nr; 181 int nr;
182 int rc; 182 int rc;
183 struct task_struct *task, *me = current; 183 struct task_struct *task, *me = current;
184 int init_pids = thread_group_leader(me) ? 1 : 2;
184 185
185 /* Don't allow any more processes into the pid namespace */ 186 /* Don't allow any more processes into the pid namespace */
186 disable_pid_allocation(pid_ns); 187 disable_pid_allocation(pid_ns);
@@ -230,7 +231,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
230 */ 231 */
231 for (;;) { 232 for (;;) {
232 set_current_state(TASK_UNINTERRUPTIBLE); 233 set_current_state(TASK_UNINTERRUPTIBLE);
233 if (pid_ns->nr_hashed == 1) 234 if (pid_ns->nr_hashed == init_pids)
234 break; 235 break;
235 schedule(); 236 schedule();
236 } 237 }
diff --git a/kernel/printk.c b/kernel/printk.c
index 0b31715f335a..abbdd9e2ac82 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -63,8 +63,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
63#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */ 63#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
64#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */ 64#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
65 65
66DECLARE_WAIT_QUEUE_HEAD(log_wait);
67
68int console_printk[4] = { 66int console_printk[4] = {
69 DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */ 67 DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */
70 DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */ 68 DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
@@ -224,6 +222,7 @@ struct log {
224static DEFINE_RAW_SPINLOCK(logbuf_lock); 222static DEFINE_RAW_SPINLOCK(logbuf_lock);
225 223
226#ifdef CONFIG_PRINTK 224#ifdef CONFIG_PRINTK
225DECLARE_WAIT_QUEUE_HEAD(log_wait);
227/* the next printk record to read by syslog(READ) or /proc/kmsg */ 226/* the next printk record to read by syslog(READ) or /proc/kmsg */
228static u64 syslog_seq; 227static u64 syslog_seq;
229static u32 syslog_idx; 228static u32 syslog_idx;
@@ -1957,45 +1956,6 @@ int is_console_locked(void)
1957 return console_locked; 1956 return console_locked;
1958} 1957}
1959 1958
1960/*
1961 * Delayed printk version, for scheduler-internal messages:
1962 */
1963#define PRINTK_BUF_SIZE 512
1964
1965#define PRINTK_PENDING_WAKEUP 0x01
1966#define PRINTK_PENDING_SCHED 0x02
1967
1968static DEFINE_PER_CPU(int, printk_pending);
1969static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
1970
1971static void wake_up_klogd_work_func(struct irq_work *irq_work)
1972{
1973 int pending = __this_cpu_xchg(printk_pending, 0);
1974
1975 if (pending & PRINTK_PENDING_SCHED) {
1976 char *buf = __get_cpu_var(printk_sched_buf);
1977 printk(KERN_WARNING "[sched_delayed] %s", buf);
1978 }
1979
1980 if (pending & PRINTK_PENDING_WAKEUP)
1981 wake_up_interruptible(&log_wait);
1982}
1983
1984static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
1985 .func = wake_up_klogd_work_func,
1986 .flags = IRQ_WORK_LAZY,
1987};
1988
1989void wake_up_klogd(void)
1990{
1991 preempt_disable();
1992 if (waitqueue_active(&log_wait)) {
1993 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
1994 irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
1995 }
1996 preempt_enable();
1997}
1998
1999static void console_cont_flush(char *text, size_t size) 1959static void console_cont_flush(char *text, size_t size)
2000{ 1960{
2001 unsigned long flags; 1961 unsigned long flags;
@@ -2458,6 +2418,44 @@ static int __init printk_late_init(void)
2458late_initcall(printk_late_init); 2418late_initcall(printk_late_init);
2459 2419
2460#if defined CONFIG_PRINTK 2420#if defined CONFIG_PRINTK
2421/*
2422 * Delayed printk version, for scheduler-internal messages:
2423 */
2424#define PRINTK_BUF_SIZE 512
2425
2426#define PRINTK_PENDING_WAKEUP 0x01
2427#define PRINTK_PENDING_SCHED 0x02
2428
2429static DEFINE_PER_CPU(int, printk_pending);
2430static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
2431
2432static void wake_up_klogd_work_func(struct irq_work *irq_work)
2433{
2434 int pending = __this_cpu_xchg(printk_pending, 0);
2435
2436 if (pending & PRINTK_PENDING_SCHED) {
2437 char *buf = __get_cpu_var(printk_sched_buf);
2438 printk(KERN_WARNING "[sched_delayed] %s", buf);
2439 }
2440
2441 if (pending & PRINTK_PENDING_WAKEUP)
2442 wake_up_interruptible(&log_wait);
2443}
2444
2445static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
2446 .func = wake_up_klogd_work_func,
2447 .flags = IRQ_WORK_LAZY,
2448};
2449
2450void wake_up_klogd(void)
2451{
2452 preempt_disable();
2453 if (waitqueue_active(&log_wait)) {
2454 this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
2455 irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
2456 }
2457 preempt_enable();
2458}
2461 2459
2462int printk_sched(const char *fmt, ...) 2460int printk_sched(const char *fmt, ...)
2463{ 2461{
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index c685e31492df..c3ae1446461c 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -176,10 +176,36 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
176 u64 this_clock, remote_clock; 176 u64 this_clock, remote_clock;
177 u64 *ptr, old_val, val; 177 u64 *ptr, old_val, val;
178 178
179#if BITS_PER_LONG != 64
180again:
181 /*
182 * Careful here: The local and the remote clock values need to
183 * be read out atomic as we need to compare the values and
184 * then update either the local or the remote side. So the
185 * cmpxchg64 below only protects one readout.
186 *
187 * We must reread via sched_clock_local() in the retry case on
188 * 32bit as an NMI could use sched_clock_local() via the
189 * tracer and hit between the readout of
190 * the low32bit and the high 32bit portion.
191 */
192 this_clock = sched_clock_local(my_scd);
193 /*
194 * We must enforce atomic readout on 32bit, otherwise the
195 * update on the remote cpu can hit inbetween the readout of
196 * the low32bit and the high 32bit portion.
197 */
198 remote_clock = cmpxchg64(&scd->clock, 0, 0);
199#else
200 /*
201 * On 64bit the read of [my]scd->clock is atomic versus the
202 * update, so we can avoid the above 32bit dance.
203 */
179 sched_clock_local(my_scd); 204 sched_clock_local(my_scd);
180again: 205again:
181 this_clock = my_scd->clock; 206 this_clock = my_scd->clock;
182 remote_clock = scd->clock; 207 remote_clock = scd->clock;
208#endif
183 209
184 /* 210 /*
185 * Use the opportunity that we have both locks 211 * Use the opportunity that we have both locks
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 7f12624a393c..67d04651f44b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1498,8 +1498,10 @@ static void try_to_wake_up_local(struct task_struct *p)
1498{ 1498{
1499 struct rq *rq = task_rq(p); 1499 struct rq *rq = task_rq(p);
1500 1500
1501 BUG_ON(rq != this_rq()); 1501 if (WARN_ON_ONCE(rq != this_rq()) ||
1502 BUG_ON(p == current); 1502 WARN_ON_ONCE(p == current))
1503 return;
1504
1503 lockdep_assert_held(&rq->lock); 1505 lockdep_assert_held(&rq->lock);
1504 1506
1505 if (!raw_spin_trylock(&p->pi_lock)) { 1507 if (!raw_spin_trylock(&p->pi_lock)) {
@@ -4999,7 +5001,7 @@ static void sd_free_ctl_entry(struct ctl_table **tablep)
4999} 5001}
5000 5002
5001static int min_load_idx = 0; 5003static int min_load_idx = 0;
5002static int max_load_idx = CPU_LOAD_IDX_MAX; 5004static int max_load_idx = CPU_LOAD_IDX_MAX-1;
5003 5005
5004static void 5006static void
5005set_table_entry(struct ctl_table *entry, 5007set_table_entry(struct ctl_table *entry,
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index ed12cbb135f4..e93cca92f38b 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -310,7 +310,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
310 310
311 t = tsk; 311 t = tsk;
312 do { 312 do {
313 task_cputime(tsk, &utime, &stime); 313 task_cputime(t, &utime, &stime);
314 times->utime += utime; 314 times->utime += utime;
315 times->stime += stime; 315 times->stime += stime;
316 times->sum_exec_runtime += task_sched_runtime(t); 316 times->sum_exec_runtime += task_sched_runtime(t);
diff --git a/kernel/signal.c b/kernel/signal.c
index 2ec870a4c3c4..598dc06be421 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -485,6 +485,9 @@ flush_signal_handlers(struct task_struct *t, int force_default)
485 if (force_default || ka->sa.sa_handler != SIG_IGN) 485 if (force_default || ka->sa.sa_handler != SIG_IGN)
486 ka->sa.sa_handler = SIG_DFL; 486 ka->sa.sa_handler = SIG_DFL;
487 ka->sa.sa_flags = 0; 487 ka->sa.sa_flags = 0;
488#ifdef __ARCH_HAS_SA_RESTORER
489 ka->sa.sa_restorer = NULL;
490#endif
488 sigemptyset(&ka->sa.sa_mask); 491 sigemptyset(&ka->sa.sa_mask);
489 ka++; 492 ka++;
490 } 493 }
@@ -2682,7 +2685,7 @@ static int do_sigpending(void *set, unsigned long sigsetsize)
2682/** 2685/**
2683 * sys_rt_sigpending - examine a pending signal that has been raised 2686 * sys_rt_sigpending - examine a pending signal that has been raised
2684 * while blocked 2687 * while blocked
2685 * @set: stores pending signals 2688 * @uset: stores pending signals
2686 * @sigsetsize: size of sigset_t type or larger 2689 * @sigsetsize: size of sigset_t type or larger
2687 */ 2690 */
2688SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize) 2691SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, uset, size_t, sigsetsize)
@@ -2945,7 +2948,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
2945 2948
2946static int do_tkill(pid_t tgid, pid_t pid, int sig) 2949static int do_tkill(pid_t tgid, pid_t pid, int sig)
2947{ 2950{
2948 struct siginfo info; 2951 struct siginfo info = {};
2949 2952
2950 info.si_signo = sig; 2953 info.si_signo = sig;
2951 info.si_errno = 0; 2954 info.si_errno = 0;
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 8eaed9aa9cf0..02fc5c933673 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -185,8 +185,18 @@ __smpboot_create_thread(struct smp_hotplug_thread *ht, unsigned int cpu)
185 } 185 }
186 get_task_struct(tsk); 186 get_task_struct(tsk);
187 *per_cpu_ptr(ht->store, cpu) = tsk; 187 *per_cpu_ptr(ht->store, cpu) = tsk;
188 if (ht->create) 188 if (ht->create) {
189 ht->create(cpu); 189 /*
190 * Make sure that the task has actually scheduled out
191 * into park position, before calling the create
192 * callback. At least the migration thread callback
193 * requires that the task is off the runqueue.
194 */
195 if (!wait_task_inactive(tsk, TASK_PARKED))
196 WARN_ON(1);
197 else
198 ht->create(cpu);
199 }
190 return 0; 200 return 0;
191} 201}
192 202
diff --git a/kernel/sys.c b/kernel/sys.c
index 81f56445fba9..0da73cf73e60 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -324,7 +324,6 @@ void kernel_restart_prepare(char *cmd)
324 system_state = SYSTEM_RESTART; 324 system_state = SYSTEM_RESTART;
325 usermodehelper_disable(); 325 usermodehelper_disable();
326 device_shutdown(); 326 device_shutdown();
327 syscore_shutdown();
328} 327}
329 328
330/** 329/**
@@ -370,6 +369,7 @@ void kernel_restart(char *cmd)
370{ 369{
371 kernel_restart_prepare(cmd); 370 kernel_restart_prepare(cmd);
372 disable_nonboot_cpus(); 371 disable_nonboot_cpus();
372 syscore_shutdown();
373 if (!cmd) 373 if (!cmd)
374 printk(KERN_EMERG "Restarting system.\n"); 374 printk(KERN_EMERG "Restarting system.\n");
375 else 375 else
@@ -395,6 +395,7 @@ static void kernel_shutdown_prepare(enum system_states state)
395void kernel_halt(void) 395void kernel_halt(void)
396{ 396{
397 kernel_shutdown_prepare(SYSTEM_HALT); 397 kernel_shutdown_prepare(SYSTEM_HALT);
398 disable_nonboot_cpus();
398 syscore_shutdown(); 399 syscore_shutdown();
399 printk(KERN_EMERG "System halted.\n"); 400 printk(KERN_EMERG "System halted.\n");
400 kmsg_dump(KMSG_DUMP_HALT); 401 kmsg_dump(KMSG_DUMP_HALT);
@@ -2185,9 +2186,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
2185 2186
2186char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff"; 2187char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
2187 2188
2188static int __orderly_poweroff(void) 2189static int __orderly_poweroff(bool force)
2189{ 2190{
2190 int argc;
2191 char **argv; 2191 char **argv;
2192 static char *envp[] = { 2192 static char *envp[] = {
2193 "HOME=/", 2193 "HOME=/",
@@ -2196,20 +2196,40 @@ static int __orderly_poweroff(void)
2196 }; 2196 };
2197 int ret; 2197 int ret;
2198 2198
2199 argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); 2199 argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
2200 if (argv == NULL) { 2200 if (argv) {
2201 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
2202 argv_free(argv);
2203 } else {
2201 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", 2204 printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
2202 __func__, poweroff_cmd); 2205 __func__, poweroff_cmd);
2203 return -ENOMEM; 2206 ret = -ENOMEM;
2204 } 2207 }
2205 2208
2206 ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC, 2209 if (ret && force) {
2207 NULL, NULL, NULL); 2210 printk(KERN_WARNING "Failed to start orderly shutdown: "
2208 argv_free(argv); 2211 "forcing the issue\n");
2212 /*
2213 * I guess this should try to kick off some daemon to sync and
2214 * poweroff asap. Or not even bother syncing if we're doing an
2215 * emergency shutdown?
2216 */
2217 emergency_sync();
2218 kernel_power_off();
2219 }
2209 2220
2210 return ret; 2221 return ret;
2211} 2222}
2212 2223
2224static bool poweroff_force;
2225
2226static void poweroff_work_func(struct work_struct *work)
2227{
2228 __orderly_poweroff(poweroff_force);
2229}
2230
2231static DECLARE_WORK(poweroff_work, poweroff_work_func);
2232
2213/** 2233/**
2214 * orderly_poweroff - Trigger an orderly system poweroff 2234 * orderly_poweroff - Trigger an orderly system poweroff
2215 * @force: force poweroff if command execution fails 2235 * @force: force poweroff if command execution fails
@@ -2219,21 +2239,9 @@ static int __orderly_poweroff(void)
2219 */ 2239 */
2220int orderly_poweroff(bool force) 2240int orderly_poweroff(bool force)
2221{ 2241{
2222 int ret = __orderly_poweroff(); 2242 if (force) /* do not override the pending "true" */
2223 2243 poweroff_force = true;
2224 if (ret && force) { 2244 schedule_work(&poweroff_work);
2225 printk(KERN_WARNING "Failed to start orderly shutdown: " 2245 return 0;
2226 "forcing the issue\n");
2227
2228 /*
2229 * I guess this should try to kick off some daemon to sync and
2230 * poweroff asap. Or not even bother syncing if we're doing an
2231 * emergency shutdown?
2232 */
2233 emergency_sync();
2234 kernel_power_off();
2235 }
2236
2237 return ret;
2238} 2246}
2239EXPORT_SYMBOL_GPL(orderly_poweroff); 2247EXPORT_SYMBOL_GPL(orderly_poweroff);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index f8d2109ef0a2..6e23fde83dbe 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -66,7 +66,8 @@ static void tick_broadcast_start_periodic(struct clock_event_device *bc)
66 */ 66 */
67int tick_check_broadcast_device(struct clock_event_device *dev) 67int tick_check_broadcast_device(struct clock_event_device *dev)
68{ 68{
69 if ((tick_broadcast_device.evtdev && 69 if ((dev->features & CLOCK_EVT_FEAT_DUMMY) ||
70 (tick_broadcast_device.evtdev &&
70 tick_broadcast_device.evtdev->rating >= dev->rating) || 71 tick_broadcast_device.evtdev->rating >= dev->rating) ||
71 (dev->features & CLOCK_EVT_FEAT_C3STOP)) 72 (dev->features & CLOCK_EVT_FEAT_C3STOP))
72 return 0; 73 return 0;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 9e5b8c272eec..5a0f781cd729 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -739,12 +739,6 @@ static void blk_add_trace_rq_complete(void *ignore,
739 struct request_queue *q, 739 struct request_queue *q,
740 struct request *rq) 740 struct request *rq)
741{ 741{
742 struct blk_trace *bt = q->blk_trace;
743
744 /* if control ever passes through here, it's a request based driver */
745 if (unlikely(bt && !bt->rq_based))
746 bt->rq_based = true;
747
748 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); 742 blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
749} 743}
750 744
@@ -780,24 +774,10 @@ static void blk_add_trace_bio_bounce(void *ignore,
780 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); 774 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
781} 775}
782 776
783static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) 777static void blk_add_trace_bio_complete(void *ignore,
778 struct request_queue *q, struct bio *bio,
779 int error)
784{ 780{
785 struct request_queue *q;
786 struct blk_trace *bt;
787
788 if (!bio->bi_bdev)
789 return;
790
791 q = bdev_get_queue(bio->bi_bdev);
792 bt = q->blk_trace;
793
794 /*
795 * Request based drivers will generate both rq and bio completions.
796 * Ignore bio ones.
797 */
798 if (likely(!bt) || bt->rq_based)
799 return;
800
801 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); 781 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
802} 782}
803 783
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ab25b88aae56..b3fde6d7b7fc 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -66,7 +66,7 @@
66 66
67static struct ftrace_ops ftrace_list_end __read_mostly = { 67static struct ftrace_ops ftrace_list_end __read_mostly = {
68 .func = ftrace_stub, 68 .func = ftrace_stub,
69 .flags = FTRACE_OPS_FL_RECURSION_SAFE, 69 .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
70}; 70};
71 71
72/* ftrace_enabled is a method to turn ftrace on or off */ 72/* ftrace_enabled is a method to turn ftrace on or off */
@@ -694,7 +694,6 @@ int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
694 free_page(tmp); 694 free_page(tmp);
695 } 695 }
696 696
697 free_page((unsigned long)stat->pages);
698 stat->pages = NULL; 697 stat->pages = NULL;
699 stat->start = NULL; 698 stat->start = NULL;
700 699
@@ -1053,6 +1052,19 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
1053 1052
1054static struct pid * const ftrace_swapper_pid = &init_struct_pid; 1053static struct pid * const ftrace_swapper_pid = &init_struct_pid;
1055 1054
1055loff_t
1056ftrace_filter_lseek(struct file *file, loff_t offset, int whence)
1057{
1058 loff_t ret;
1059
1060 if (file->f_mode & FMODE_READ)
1061 ret = seq_lseek(file, offset, whence);
1062 else
1063 file->f_pos = ret = 1;
1064
1065 return ret;
1066}
1067
1056#ifdef CONFIG_DYNAMIC_FTRACE 1068#ifdef CONFIG_DYNAMIC_FTRACE
1057 1069
1058#ifndef CONFIG_FTRACE_MCOUNT_RECORD 1070#ifndef CONFIG_FTRACE_MCOUNT_RECORD
@@ -2613,7 +2625,7 @@ static void ftrace_filter_reset(struct ftrace_hash *hash)
2613 * routine, you can use ftrace_filter_write() for the write 2625 * routine, you can use ftrace_filter_write() for the write
2614 * routine if @flag has FTRACE_ITER_FILTER set, or 2626 * routine if @flag has FTRACE_ITER_FILTER set, or
2615 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set. 2627 * ftrace_notrace_write() if @flag has FTRACE_ITER_NOTRACE set.
2616 * ftrace_regex_lseek() should be used as the lseek routine, and 2628 * ftrace_filter_lseek() should be used as the lseek routine, and
2617 * release must call ftrace_regex_release(). 2629 * release must call ftrace_regex_release().
2618 */ 2630 */
2619int 2631int
@@ -2697,19 +2709,6 @@ ftrace_notrace_open(struct inode *inode, struct file *file)
2697 inode, file); 2709 inode, file);
2698} 2710}
2699 2711
2700loff_t
2701ftrace_regex_lseek(struct file *file, loff_t offset, int whence)
2702{
2703 loff_t ret;
2704
2705 if (file->f_mode & FMODE_READ)
2706 ret = seq_lseek(file, offset, whence);
2707 else
2708 file->f_pos = ret = 1;
2709
2710 return ret;
2711}
2712
2713static int ftrace_match(char *str, char *regex, int len, int type) 2712static int ftrace_match(char *str, char *regex, int len, int type)
2714{ 2713{
2715 int matched = 0; 2714 int matched = 0;
@@ -3104,8 +3103,8 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
3104 continue; 3103 continue;
3105 } 3104 }
3106 3105
3107 hlist_del(&entry->node); 3106 hlist_del_rcu(&entry->node);
3108 call_rcu(&entry->rcu, ftrace_free_entry_rcu); 3107 call_rcu_sched(&entry->rcu, ftrace_free_entry_rcu);
3109 } 3108 }
3110 } 3109 }
3111 __disable_ftrace_function_probe(); 3110 __disable_ftrace_function_probe();
@@ -3441,14 +3440,14 @@ static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
3441 3440
3442static int __init set_ftrace_notrace(char *str) 3441static int __init set_ftrace_notrace(char *str)
3443{ 3442{
3444 strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE); 3443 strlcpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
3445 return 1; 3444 return 1;
3446} 3445}
3447__setup("ftrace_notrace=", set_ftrace_notrace); 3446__setup("ftrace_notrace=", set_ftrace_notrace);
3448 3447
3449static int __init set_ftrace_filter(char *str) 3448static int __init set_ftrace_filter(char *str)
3450{ 3449{
3451 strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE); 3450 strlcpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
3452 return 1; 3451 return 1;
3453} 3452}
3454__setup("ftrace_filter=", set_ftrace_filter); 3453__setup("ftrace_filter=", set_ftrace_filter);
@@ -3571,7 +3570,7 @@ static const struct file_operations ftrace_filter_fops = {
3571 .open = ftrace_filter_open, 3570 .open = ftrace_filter_open,
3572 .read = seq_read, 3571 .read = seq_read,
3573 .write = ftrace_filter_write, 3572 .write = ftrace_filter_write,
3574 .llseek = ftrace_regex_lseek, 3573 .llseek = ftrace_filter_lseek,
3575 .release = ftrace_regex_release, 3574 .release = ftrace_regex_release,
3576}; 3575};
3577 3576
@@ -3579,7 +3578,7 @@ static const struct file_operations ftrace_notrace_fops = {
3579 .open = ftrace_notrace_open, 3578 .open = ftrace_notrace_open,
3580 .read = seq_read, 3579 .read = seq_read,
3581 .write = ftrace_notrace_write, 3580 .write = ftrace_notrace_write,
3582 .llseek = ftrace_regex_lseek, 3581 .llseek = ftrace_filter_lseek,
3583 .release = ftrace_regex_release, 3582 .release = ftrace_regex_release,
3584}; 3583};
3585 3584
@@ -3784,8 +3783,8 @@ static const struct file_operations ftrace_graph_fops = {
3784 .open = ftrace_graph_open, 3783 .open = ftrace_graph_open,
3785 .read = seq_read, 3784 .read = seq_read,
3786 .write = ftrace_graph_write, 3785 .write = ftrace_graph_write,
3786 .llseek = ftrace_filter_lseek,
3787 .release = ftrace_graph_release, 3787 .release = ftrace_graph_release,
3788 .llseek = seq_lseek,
3789}; 3788};
3790#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ 3789#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
3791 3790
@@ -4131,7 +4130,8 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
4131 preempt_disable_notrace(); 4130 preempt_disable_notrace();
4132 trace_recursion_set(TRACE_CONTROL_BIT); 4131 trace_recursion_set(TRACE_CONTROL_BIT);
4133 do_for_each_ftrace_op(op, ftrace_control_list) { 4132 do_for_each_ftrace_op(op, ftrace_control_list) {
4134 if (!ftrace_function_local_disabled(op) && 4133 if (!(op->flags & FTRACE_OPS_FL_STUB) &&
4134 !ftrace_function_local_disabled(op) &&
4135 ftrace_ops_test(op, ip)) 4135 ftrace_ops_test(op, ip))
4136 op->func(ip, parent_ip, op, regs); 4136 op->func(ip, parent_ip, op, regs);
4137 } while_for_each_ftrace_op(op); 4137 } while_for_each_ftrace_op(op);
@@ -4439,7 +4439,7 @@ static const struct file_operations ftrace_pid_fops = {
4439 .open = ftrace_pid_open, 4439 .open = ftrace_pid_open,
4440 .write = ftrace_pid_write, 4440 .write = ftrace_pid_write,
4441 .read = seq_read, 4441 .read = seq_read,
4442 .llseek = seq_lseek, 4442 .llseek = ftrace_filter_lseek,
4443 .release = ftrace_pid_release, 4443 .release = ftrace_pid_release,
4444}; 4444};
4445 4445
@@ -4555,12 +4555,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
4555 ftrace_startup_sysctl(); 4555 ftrace_startup_sysctl();
4556 4556
4557 /* we are starting ftrace again */ 4557 /* we are starting ftrace again */
4558 if (ftrace_ops_list != &ftrace_list_end) { 4558 if (ftrace_ops_list != &ftrace_list_end)
4559 if (ftrace_ops_list->next == &ftrace_list_end) 4559 update_ftrace_function();
4560 ftrace_trace_function = ftrace_ops_list->func;
4561 else
4562 ftrace_trace_function = ftrace_ops_list_func;
4563 }
4564 4560
4565 } else { 4561 } else {
4566 /* stopping ftrace calls (just send to ftrace_stub) */ 4562 /* stopping ftrace calls (just send to ftrace_stub) */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1f835a83cb2c..66338c4f7f4b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -132,7 +132,7 @@ static char *default_bootup_tracer;
132 132
133static int __init set_cmdline_ftrace(char *str) 133static int __init set_cmdline_ftrace(char *str)
134{ 134{
135 strncpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); 135 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
136 default_bootup_tracer = bootup_tracer_buf; 136 default_bootup_tracer = bootup_tracer_buf;
137 /* We are using ftrace early, expand it */ 137 /* We are using ftrace early, expand it */
138 ring_buffer_expanded = 1; 138 ring_buffer_expanded = 1;
@@ -162,7 +162,7 @@ static char *trace_boot_options __initdata;
162 162
163static int __init set_trace_boot_options(char *str) 163static int __init set_trace_boot_options(char *str)
164{ 164{
165 strncpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); 165 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
166 trace_boot_options = trace_boot_options_buf; 166 trace_boot_options = trace_boot_options_buf;
167 return 0; 167 return 0;
168} 168}
@@ -704,7 +704,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
704void 704void
705update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) 705update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
706{ 706{
707 struct ring_buffer *buf = tr->buffer; 707 struct ring_buffer *buf;
708 708
709 if (trace_stop_count) 709 if (trace_stop_count)
710 return; 710 return;
@@ -719,6 +719,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
719 719
720 arch_spin_lock(&ftrace_max_lock); 720 arch_spin_lock(&ftrace_max_lock);
721 721
722 buf = tr->buffer;
722 tr->buffer = max_tr.buffer; 723 tr->buffer = max_tr.buffer;
723 max_tr.buffer = buf; 724 max_tr.buffer = buf;
724 725
@@ -743,8 +744,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
743 return; 744 return;
744 745
745 WARN_ON_ONCE(!irqs_disabled()); 746 WARN_ON_ONCE(!irqs_disabled());
746 if (WARN_ON_ONCE(!current_trace->allocated_snapshot)) 747 if (!current_trace->allocated_snapshot) {
748 /* Only the nop tracer should hit this when disabling */
749 WARN_ON_ONCE(current_trace != &nop_trace);
747 return; 750 return;
751 }
748 752
749 arch_spin_lock(&ftrace_max_lock); 753 arch_spin_lock(&ftrace_max_lock);
750 754
@@ -2880,11 +2884,25 @@ static int set_tracer_option(struct tracer *trace, char *cmp, int neg)
2880 return -EINVAL; 2884 return -EINVAL;
2881} 2885}
2882 2886
2883static void set_tracer_flags(unsigned int mask, int enabled) 2887/* Some tracers require overwrite to stay enabled */
2888int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
2889{
2890 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
2891 return -1;
2892
2893 return 0;
2894}
2895
2896int set_tracer_flag(unsigned int mask, int enabled)
2884{ 2897{
2885 /* do nothing if flag is already set */ 2898 /* do nothing if flag is already set */
2886 if (!!(trace_flags & mask) == !!enabled) 2899 if (!!(trace_flags & mask) == !!enabled)
2887 return; 2900 return 0;
2901
2902 /* Give the tracer a chance to approve the change */
2903 if (current_trace->flag_changed)
2904 if (current_trace->flag_changed(current_trace, mask, !!enabled))
2905 return -EINVAL;
2888 2906
2889 if (enabled) 2907 if (enabled)
2890 trace_flags |= mask; 2908 trace_flags |= mask;
@@ -2894,18 +2912,24 @@ static void set_tracer_flags(unsigned int mask, int enabled)
2894 if (mask == TRACE_ITER_RECORD_CMD) 2912 if (mask == TRACE_ITER_RECORD_CMD)
2895 trace_event_enable_cmd_record(enabled); 2913 trace_event_enable_cmd_record(enabled);
2896 2914
2897 if (mask == TRACE_ITER_OVERWRITE) 2915 if (mask == TRACE_ITER_OVERWRITE) {
2898 ring_buffer_change_overwrite(global_trace.buffer, enabled); 2916 ring_buffer_change_overwrite(global_trace.buffer, enabled);
2917#ifdef CONFIG_TRACER_MAX_TRACE
2918 ring_buffer_change_overwrite(max_tr.buffer, enabled);
2919#endif
2920 }
2899 2921
2900 if (mask == TRACE_ITER_PRINTK) 2922 if (mask == TRACE_ITER_PRINTK)
2901 trace_printk_start_stop_comm(enabled); 2923 trace_printk_start_stop_comm(enabled);
2924
2925 return 0;
2902} 2926}
2903 2927
2904static int trace_set_options(char *option) 2928static int trace_set_options(char *option)
2905{ 2929{
2906 char *cmp; 2930 char *cmp;
2907 int neg = 0; 2931 int neg = 0;
2908 int ret = 0; 2932 int ret = -ENODEV;
2909 int i; 2933 int i;
2910 2934
2911 cmp = strstrip(option); 2935 cmp = strstrip(option);
@@ -2915,19 +2939,20 @@ static int trace_set_options(char *option)
2915 cmp += 2; 2939 cmp += 2;
2916 } 2940 }
2917 2941
2942 mutex_lock(&trace_types_lock);
2943
2918 for (i = 0; trace_options[i]; i++) { 2944 for (i = 0; trace_options[i]; i++) {
2919 if (strcmp(cmp, trace_options[i]) == 0) { 2945 if (strcmp(cmp, trace_options[i]) == 0) {
2920 set_tracer_flags(1 << i, !neg); 2946 ret = set_tracer_flag(1 << i, !neg);
2921 break; 2947 break;
2922 } 2948 }
2923 } 2949 }
2924 2950
2925 /* If no option could be set, test the specific tracer options */ 2951 /* If no option could be set, test the specific tracer options */
2926 if (!trace_options[i]) { 2952 if (!trace_options[i])
2927 mutex_lock(&trace_types_lock);
2928 ret = set_tracer_option(current_trace, cmp, neg); 2953 ret = set_tracer_option(current_trace, cmp, neg);
2929 mutex_unlock(&trace_types_lock); 2954
2930 } 2955 mutex_unlock(&trace_types_lock);
2931 2956
2932 return ret; 2957 return ret;
2933} 2958}
@@ -2937,6 +2962,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2937 size_t cnt, loff_t *ppos) 2962 size_t cnt, loff_t *ppos)
2938{ 2963{
2939 char buf[64]; 2964 char buf[64];
2965 int ret;
2940 2966
2941 if (cnt >= sizeof(buf)) 2967 if (cnt >= sizeof(buf))
2942 return -EINVAL; 2968 return -EINVAL;
@@ -2946,7 +2972,9 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
2946 2972
2947 buf[cnt] = 0; 2973 buf[cnt] = 0;
2948 2974
2949 trace_set_options(buf); 2975 ret = trace_set_options(buf);
2976 if (ret < 0)
2977 return ret;
2950 2978
2951 *ppos += cnt; 2979 *ppos += cnt;
2952 2980
@@ -3250,6 +3278,9 @@ static int tracing_set_tracer(const char *buf)
3250 goto out; 3278 goto out;
3251 3279
3252 trace_branch_disable(); 3280 trace_branch_disable();
3281
3282 current_trace->enabled = false;
3283
3253 if (current_trace->reset) 3284 if (current_trace->reset)
3254 current_trace->reset(tr); 3285 current_trace->reset(tr);
3255 3286
@@ -3294,6 +3325,7 @@ static int tracing_set_tracer(const char *buf)
3294 } 3325 }
3295 3326
3296 current_trace = t; 3327 current_trace = t;
3328 current_trace->enabled = true;
3297 trace_branch_enable(tr); 3329 trace_branch_enable(tr);
3298 out: 3330 out:
3299 mutex_unlock(&trace_types_lock); 3331 mutex_unlock(&trace_types_lock);
@@ -4780,7 +4812,13 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
4780 4812
4781 if (val != 0 && val != 1) 4813 if (val != 0 && val != 1)
4782 return -EINVAL; 4814 return -EINVAL;
4783 set_tracer_flags(1 << index, val); 4815
4816 mutex_lock(&trace_types_lock);
4817 ret = set_tracer_flag(1 << index, val);
4818 mutex_unlock(&trace_types_lock);
4819
4820 if (ret < 0)
4821 return ret;
4784 4822
4785 *ppos += cnt; 4823 *ppos += cnt;
4786 4824
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 57d7e5397d56..2081971367ea 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -283,11 +283,15 @@ struct tracer {
283 enum print_line_t (*print_line)(struct trace_iterator *iter); 283 enum print_line_t (*print_line)(struct trace_iterator *iter);
284 /* If you handled the flag setting, return 0 */ 284 /* If you handled the flag setting, return 0 */
285 int (*set_flag)(u32 old_flags, u32 bit, int set); 285 int (*set_flag)(u32 old_flags, u32 bit, int set);
286 /* Return 0 if OK with change, else return non-zero */
287 int (*flag_changed)(struct tracer *tracer,
288 u32 mask, int set);
286 struct tracer *next; 289 struct tracer *next;
287 struct tracer_flags *flags; 290 struct tracer_flags *flags;
288 bool print_max; 291 bool print_max;
289 bool use_max_tr; 292 bool use_max_tr;
290 bool allocated_snapshot; 293 bool allocated_snapshot;
294 bool enabled;
291}; 295};
292 296
293 297
@@ -943,6 +947,8 @@ extern const char *__stop___trace_bprintk_fmt[];
943 947
944void trace_printk_init_buffers(void); 948void trace_printk_init_buffers(void);
945void trace_printk_start_comm(void); 949void trace_printk_start_comm(void);
950int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set);
951int set_tracer_flag(unsigned int mask, int enabled);
946 952
947#undef FTRACE_ENTRY 953#undef FTRACE_ENTRY
948#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \ 954#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter) \
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 713a2cac4881..443b25b43b4f 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -32,7 +32,7 @@ enum {
32 32
33static int trace_type __read_mostly; 33static int trace_type __read_mostly;
34 34
35static int save_lat_flag; 35static int save_flags;
36 36
37static void stop_irqsoff_tracer(struct trace_array *tr, int graph); 37static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
38static int start_irqsoff_tracer(struct trace_array *tr, int graph); 38static int start_irqsoff_tracer(struct trace_array *tr, int graph);
@@ -558,8 +558,11 @@ static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
558 558
559static void __irqsoff_tracer_init(struct trace_array *tr) 559static void __irqsoff_tracer_init(struct trace_array *tr)
560{ 560{
561 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; 561 save_flags = trace_flags;
562 trace_flags |= TRACE_ITER_LATENCY_FMT; 562
563 /* non overwrite screws up the latency tracers */
564 set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
565 set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
563 566
564 tracing_max_latency = 0; 567 tracing_max_latency = 0;
565 irqsoff_trace = tr; 568 irqsoff_trace = tr;
@@ -573,10 +576,13 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
573 576
574static void irqsoff_tracer_reset(struct trace_array *tr) 577static void irqsoff_tracer_reset(struct trace_array *tr)
575{ 578{
579 int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
580 int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
581
576 stop_irqsoff_tracer(tr, is_graph()); 582 stop_irqsoff_tracer(tr, is_graph());
577 583
578 if (!save_lat_flag) 584 set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
579 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 585 set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
580} 586}
581 587
582static void irqsoff_tracer_start(struct trace_array *tr) 588static void irqsoff_tracer_start(struct trace_array *tr)
@@ -609,6 +615,7 @@ static struct tracer irqsoff_tracer __read_mostly =
609 .print_line = irqsoff_print_line, 615 .print_line = irqsoff_print_line,
610 .flags = &tracer_flags, 616 .flags = &tracer_flags,
611 .set_flag = irqsoff_set_flag, 617 .set_flag = irqsoff_set_flag,
618 .flag_changed = trace_keep_overwrite,
612#ifdef CONFIG_FTRACE_SELFTEST 619#ifdef CONFIG_FTRACE_SELFTEST
613 .selftest = trace_selftest_startup_irqsoff, 620 .selftest = trace_selftest_startup_irqsoff,
614#endif 621#endif
@@ -642,6 +649,7 @@ static struct tracer preemptoff_tracer __read_mostly =
642 .print_line = irqsoff_print_line, 649 .print_line = irqsoff_print_line,
643 .flags = &tracer_flags, 650 .flags = &tracer_flags,
644 .set_flag = irqsoff_set_flag, 651 .set_flag = irqsoff_set_flag,
652 .flag_changed = trace_keep_overwrite,
645#ifdef CONFIG_FTRACE_SELFTEST 653#ifdef CONFIG_FTRACE_SELFTEST
646 .selftest = trace_selftest_startup_preemptoff, 654 .selftest = trace_selftest_startup_preemptoff,
647#endif 655#endif
@@ -677,6 +685,7 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
677 .print_line = irqsoff_print_line, 685 .print_line = irqsoff_print_line,
678 .flags = &tracer_flags, 686 .flags = &tracer_flags,
679 .set_flag = irqsoff_set_flag, 687 .set_flag = irqsoff_set_flag,
688 .flag_changed = trace_keep_overwrite,
680#ifdef CONFIG_FTRACE_SELFTEST 689#ifdef CONFIG_FTRACE_SELFTEST
681 .selftest = trace_selftest_startup_preemptirqsoff, 690 .selftest = trace_selftest_startup_preemptirqsoff,
682#endif 691#endif
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 75aa97fbe1a1..fde652c9a511 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -36,7 +36,7 @@ static void __wakeup_reset(struct trace_array *tr);
36static int wakeup_graph_entry(struct ftrace_graph_ent *trace); 36static int wakeup_graph_entry(struct ftrace_graph_ent *trace);
37static void wakeup_graph_return(struct ftrace_graph_ret *trace); 37static void wakeup_graph_return(struct ftrace_graph_ret *trace);
38 38
39static int save_lat_flag; 39static int save_flags;
40 40
41#define TRACE_DISPLAY_GRAPH 1 41#define TRACE_DISPLAY_GRAPH 1
42 42
@@ -540,8 +540,11 @@ static void stop_wakeup_tracer(struct trace_array *tr)
540 540
541static int __wakeup_tracer_init(struct trace_array *tr) 541static int __wakeup_tracer_init(struct trace_array *tr)
542{ 542{
543 save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT; 543 save_flags = trace_flags;
544 trace_flags |= TRACE_ITER_LATENCY_FMT; 544
545 /* non overwrite screws up the latency tracers */
546 set_tracer_flag(TRACE_ITER_OVERWRITE, 1);
547 set_tracer_flag(TRACE_ITER_LATENCY_FMT, 1);
545 548
546 tracing_max_latency = 0; 549 tracing_max_latency = 0;
547 wakeup_trace = tr; 550 wakeup_trace = tr;
@@ -563,12 +566,15 @@ static int wakeup_rt_tracer_init(struct trace_array *tr)
563 566
564static void wakeup_tracer_reset(struct trace_array *tr) 567static void wakeup_tracer_reset(struct trace_array *tr)
565{ 568{
569 int lat_flag = save_flags & TRACE_ITER_LATENCY_FMT;
570 int overwrite_flag = save_flags & TRACE_ITER_OVERWRITE;
571
566 stop_wakeup_tracer(tr); 572 stop_wakeup_tracer(tr);
567 /* make sure we put back any tasks we are tracing */ 573 /* make sure we put back any tasks we are tracing */
568 wakeup_reset(tr); 574 wakeup_reset(tr);
569 575
570 if (!save_lat_flag) 576 set_tracer_flag(TRACE_ITER_LATENCY_FMT, lat_flag);
571 trace_flags &= ~TRACE_ITER_LATENCY_FMT; 577 set_tracer_flag(TRACE_ITER_OVERWRITE, overwrite_flag);
572} 578}
573 579
574static void wakeup_tracer_start(struct trace_array *tr) 580static void wakeup_tracer_start(struct trace_array *tr)
@@ -594,6 +600,7 @@ static struct tracer wakeup_tracer __read_mostly =
594 .print_line = wakeup_print_line, 600 .print_line = wakeup_print_line,
595 .flags = &tracer_flags, 601 .flags = &tracer_flags,
596 .set_flag = wakeup_set_flag, 602 .set_flag = wakeup_set_flag,
603 .flag_changed = trace_keep_overwrite,
597#ifdef CONFIG_FTRACE_SELFTEST 604#ifdef CONFIG_FTRACE_SELFTEST
598 .selftest = trace_selftest_startup_wakeup, 605 .selftest = trace_selftest_startup_wakeup,
599#endif 606#endif
@@ -615,6 +622,7 @@ static struct tracer wakeup_rt_tracer __read_mostly =
615 .print_line = wakeup_print_line, 622 .print_line = wakeup_print_line,
616 .flags = &tracer_flags, 623 .flags = &tracer_flags,
617 .set_flag = wakeup_set_flag, 624 .set_flag = wakeup_set_flag,
625 .flag_changed = trace_keep_overwrite,
618#ifdef CONFIG_FTRACE_SELFTEST 626#ifdef CONFIG_FTRACE_SELFTEST
619 .selftest = trace_selftest_startup_wakeup, 627 .selftest = trace_selftest_startup_wakeup,
620#endif 628#endif
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 42ca822fc701..83a8b5b7bd35 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -322,7 +322,7 @@ static const struct file_operations stack_trace_filter_fops = {
322 .open = stack_trace_filter_open, 322 .open = stack_trace_filter_open,
323 .read = seq_read, 323 .read = seq_read,
324 .write = ftrace_filter_write, 324 .write = ftrace_filter_write,
325 .llseek = ftrace_regex_lseek, 325 .llseek = ftrace_filter_lseek,
326 .release = ftrace_regex_release, 326 .release = ftrace_regex_release,
327}; 327};
328 328
diff --git a/kernel/user.c b/kernel/user.c
index e81978e8c03b..8e635a18ab52 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,8 @@ struct user_namespace init_user_ns = {
51 .owner = GLOBAL_ROOT_UID, 51 .owner = GLOBAL_ROOT_UID,
52 .group = GLOBAL_ROOT_GID, 52 .group = GLOBAL_ROOT_GID,
53 .proc_inum = PROC_USER_INIT_INO, 53 .proc_inum = PROC_USER_INIT_INO,
54 .may_mount_sysfs = true,
55 .may_mount_proc = true,
54}; 56};
55EXPORT_SYMBOL_GPL(init_user_ns); 57EXPORT_SYMBOL_GPL(init_user_ns);
56 58
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 8b650837083e..e134d8f365dd 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -21,10 +21,12 @@
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/ctype.h> 22#include <linux/ctype.h>
23#include <linux/projid.h> 23#include <linux/projid.h>
24#include <linux/fs_struct.h>
24 25
25static struct kmem_cache *user_ns_cachep __read_mostly; 26static struct kmem_cache *user_ns_cachep __read_mostly;
26 27
27static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, 28static bool new_idmap_permitted(const struct file *file,
29 struct user_namespace *ns, int cap_setid,
28 struct uid_gid_map *map); 30 struct uid_gid_map *map);
29 31
30static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) 32static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns)
@@ -60,6 +62,15 @@ int create_user_ns(struct cred *new)
60 kgid_t group = new->egid; 62 kgid_t group = new->egid;
61 int ret; 63 int ret;
62 64
65 /*
66 * Verify that we can not violate the policy of which files
67 * may be accessed that is specified by the root directory,
68 * by verifing that the root directory is at the root of the
69 * mount namespace which allows all files to be accessed.
70 */
71 if (current_chrooted())
72 return -EPERM;
73
63 /* The creator needs a mapping in the parent user namespace 74 /* The creator needs a mapping in the parent user namespace
64 * or else we won't be able to reasonably tell userspace who 75 * or else we won't be able to reasonably tell userspace who
65 * created a user_namespace. 76 * created a user_namespace.
@@ -86,6 +97,8 @@ int create_user_ns(struct cred *new)
86 97
87 set_cred_user_ns(new, ns); 98 set_cred_user_ns(new, ns);
88 99
100 update_mnt_policy(ns);
101
89 return 0; 102 return 0;
90} 103}
91 104
@@ -600,10 +613,10 @@ static ssize_t map_write(struct file *file, const char __user *buf,
600 if (map->nr_extents != 0) 613 if (map->nr_extents != 0)
601 goto out; 614 goto out;
602 615
603 /* Require the appropriate privilege CAP_SETUID or CAP_SETGID 616 /*
604 * over the user namespace in order to set the id mapping. 617 * Adjusting namespace settings requires capabilities on the target.
605 */ 618 */
606 if (cap_valid(cap_setid) && !ns_capable(ns, cap_setid)) 619 if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN))
607 goto out; 620 goto out;
608 621
609 /* Get a buffer */ 622 /* Get a buffer */
@@ -688,7 +701,7 @@ static ssize_t map_write(struct file *file, const char __user *buf,
688 701
689 ret = -EPERM; 702 ret = -EPERM;
690 /* Validate the user is allowed to use user id's mapped to. */ 703 /* Validate the user is allowed to use user id's mapped to. */
691 if (!new_idmap_permitted(ns, cap_setid, &new_map)) 704 if (!new_idmap_permitted(file, ns, cap_setid, &new_map))
692 goto out; 705 goto out;
693 706
694 /* Map the lower ids from the parent user namespace to the 707 /* Map the lower ids from the parent user namespace to the
@@ -775,7 +788,8 @@ ssize_t proc_projid_map_write(struct file *file, const char __user *buf, size_t
775 &ns->projid_map, &ns->parent->projid_map); 788 &ns->projid_map, &ns->parent->projid_map);
776} 789}
777 790
778static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid, 791static bool new_idmap_permitted(const struct file *file,
792 struct user_namespace *ns, int cap_setid,
779 struct uid_gid_map *new_map) 793 struct uid_gid_map *new_map)
780{ 794{
781 /* Allow mapping to your own filesystem ids */ 795 /* Allow mapping to your own filesystem ids */
@@ -783,12 +797,12 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
783 u32 id = new_map->extent[0].lower_first; 797 u32 id = new_map->extent[0].lower_first;
784 if (cap_setid == CAP_SETUID) { 798 if (cap_setid == CAP_SETUID) {
785 kuid_t uid = make_kuid(ns->parent, id); 799 kuid_t uid = make_kuid(ns->parent, id);
786 if (uid_eq(uid, current_fsuid())) 800 if (uid_eq(uid, file->f_cred->fsuid))
787 return true; 801 return true;
788 } 802 }
789 else if (cap_setid == CAP_SETGID) { 803 else if (cap_setid == CAP_SETGID) {
790 kgid_t gid = make_kgid(ns->parent, id); 804 kgid_t gid = make_kgid(ns->parent, id);
791 if (gid_eq(gid, current_fsgid())) 805 if (gid_eq(gid, file->f_cred->fsgid))
792 return true; 806 return true;
793 } 807 }
794 } 808 }
@@ -799,8 +813,10 @@ static bool new_idmap_permitted(struct user_namespace *ns, int cap_setid,
799 813
800 /* Allow the specified ids if we have the appropriate capability 814 /* Allow the specified ids if we have the appropriate capability
801 * (CAP_SETUID or CAP_SETGID) over the parent user namespace. 815 * (CAP_SETUID or CAP_SETGID) over the parent user namespace.
816 * And the opener of the id file also had the approprpiate capability.
802 */ 817 */
803 if (ns_capable(ns->parent, cap_setid)) 818 if (ns_capable(ns->parent, cap_setid) &&
819 file_ns_capable(file, ns->parent, cap_setid))
804 return true; 820 return true;
805 821
806 return false; 822 return false;
@@ -837,6 +853,9 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
837 if (atomic_read(&current->mm->mm_users) > 1) 853 if (atomic_read(&current->mm->mm_users) > 1)
838 return -EINVAL; 854 return -EINVAL;
839 855
856 if (current->fs->users != 1)
857 return -EINVAL;
858
840 if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 859 if (!ns_capable(user_ns, CAP_SYS_ADMIN))
841 return -EPERM; 860 return -EPERM;
842 861
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 81f2457811eb..b48cd597145d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -457,11 +457,12 @@ static int worker_pool_assign_id(struct worker_pool *pool)
457 int ret; 457 int ret;
458 458
459 mutex_lock(&worker_pool_idr_mutex); 459 mutex_lock(&worker_pool_idr_mutex);
460 idr_pre_get(&worker_pool_idr, GFP_KERNEL); 460 ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
461 ret = idr_get_new(&worker_pool_idr, pool, &pool->id); 461 if (ret >= 0)
462 pool->id = ret;
462 mutex_unlock(&worker_pool_idr_mutex); 463 mutex_unlock(&worker_pool_idr_mutex);
463 464
464 return ret; 465 return ret < 0 ? ret : 0;
465} 466}
466 467
467/* 468/*
@@ -3446,28 +3447,34 @@ static void wq_unbind_fn(struct work_struct *work)
3446 3447
3447 spin_unlock_irq(&pool->lock); 3448 spin_unlock_irq(&pool->lock);
3448 mutex_unlock(&pool->assoc_mutex); 3449 mutex_unlock(&pool->assoc_mutex);
3449 }
3450 3450
3451 /* 3451 /*
3452 * Call schedule() so that we cross rq->lock and thus can guarantee 3452 * Call schedule() so that we cross rq->lock and thus can
3453 * sched callbacks see the %WORKER_UNBOUND flag. This is necessary 3453 * guarantee sched callbacks see the %WORKER_UNBOUND flag.
3454 * as scheduler callbacks may be invoked from other cpus. 3454 * This is necessary as scheduler callbacks may be invoked
3455 */ 3455 * from other cpus.
3456 schedule(); 3456 */
3457 schedule();
3457 3458
3458 /* 3459 /*
3459 * Sched callbacks are disabled now. Zap nr_running. After this, 3460 * Sched callbacks are disabled now. Zap nr_running.
3460 * nr_running stays zero and need_more_worker() and keep_working() 3461 * After this, nr_running stays zero and need_more_worker()
3461 * are always true as long as the worklist is not empty. Pools on 3462 * and keep_working() are always true as long as the
3462 * @cpu now behave as unbound (in terms of concurrency management) 3463 * worklist is not empty. This pool now behaves as an
3463 * pools which are served by workers tied to the CPU. 3464 * unbound (in terms of concurrency management) pool which
3464 * 3465 * are served by workers tied to the pool.
3465 * On return from this function, the current worker would trigger 3466 */
3466 * unbound chain execution of pending work items if other workers
3467 * didn't already.
3468 */
3469 for_each_std_worker_pool(pool, cpu)
3470 atomic_set(&pool->nr_running, 0); 3467 atomic_set(&pool->nr_running, 0);
3468
3469 /*
3470 * With concurrency management just turned off, a busy
3471 * worker blocking could lead to lengthy stalls. Kick off
3472 * unbound chain execution of currently pending work items.
3473 */
3474 spin_lock_irq(&pool->lock);
3475 wake_up_worker(pool);
3476 spin_unlock_irq(&pool->lock);
3477 }
3471} 3478}
3472 3479
3473/* 3480/*