aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/capability.c2
-rw-r--r--kernel/cgroup.c19
-rw-r--r--kernel/cred.c16
-rw-r--r--kernel/fork.c41
-rw-r--r--kernel/futex.c62
-rw-r--r--kernel/irq/Kconfig3
-rw-r--r--kernel/irq/handle.c111
-rw-r--r--kernel/irq/irqdesc.c40
-rw-r--r--kernel/irq/migration.c14
-rw-r--r--kernel/lockdep.c18
-rw-r--r--kernel/module.c16
-rw-r--r--kernel/panic.c1
-rw-r--r--kernel/params.c65
-rw-r--r--kernel/perf_event.c79
-rw-r--r--kernel/power/Kconfig5
-rw-r--r--kernel/power/Makefile1
-rw-r--r--kernel/power/nvs.c136
-rw-r--r--kernel/printk.c154
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcutiny.c3
-rw-r--r--kernel/sched.c26
-rw-r--r--kernel/sched_autogroup.c32
-rw-r--r--kernel/sched_autogroup.h4
-rw-r--r--kernel/sched_debug.c42
-rw-r--r--kernel/sched_fair.c126
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/smp.c62
-rw-r--r--kernel/srcu.c15
-rw-r--r--kernel/sys.c3
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/tick-sched.c7
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/timer.c6
-rw-r--r--kernel/trace/blktrace.c7
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_export.c6
-rw-r--r--kernel/trace/trace_irqsoff.c8
-rw-r--r--kernel/trace/trace_syscalls.c52
-rw-r--r--kernel/tracepoint.c31
-rw-r--r--kernel/watchdog.c43
-rw-r--r--kernel/workqueue.c20
42 files changed, 681 insertions, 626 deletions
diff --git a/kernel/capability.c b/kernel/capability.c
index 2f05303715a5..9e9385f132c8 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -306,7 +306,7 @@ int capable(int cap)
306 BUG(); 306 BUG();
307 } 307 }
308 308
309 if (security_capable(cap) == 0) { 309 if (security_capable(current_cred(), cap) == 0) {
310 current->flags |= PF_SUPERPRIV; 310 current->flags |= PF_SUPERPRIV;
311 return 1; 311 return 1;
312 } 312 }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 5c5f4cc2e99a..b24d7027b83c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock);
764 */ 764 */
765 765
766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); 766static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode);
767static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *);
767static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); 768static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry);
768static int cgroup_populate_dir(struct cgroup *cgrp); 769static int cgroup_populate_dir(struct cgroup *cgrp);
769static const struct inode_operations cgroup_dir_inode_operations; 770static const struct inode_operations cgroup_dir_inode_operations;
@@ -860,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
860 iput(inode); 861 iput(inode);
861} 862}
862 863
864static int cgroup_delete(const struct dentry *d)
865{
866 return 1;
867}
868
863static void remove_dir(struct dentry *d) 869static void remove_dir(struct dentry *d)
864{ 870{
865 struct dentry *parent = dget(d->d_parent); 871 struct dentry *parent = dget(d->d_parent);
@@ -910,7 +916,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
910 916
911 parent = dentry->d_parent; 917 parent = dentry->d_parent;
912 spin_lock(&parent->d_lock); 918 spin_lock(&parent->d_lock);
913 spin_lock(&dentry->d_lock); 919 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
914 list_del_init(&dentry->d_u.d_child); 920 list_del_init(&dentry->d_u.d_child);
915 spin_unlock(&dentry->d_lock); 921 spin_unlock(&dentry->d_lock);
916 spin_unlock(&parent->d_lock); 922 spin_unlock(&parent->d_lock);
@@ -1451,6 +1457,7 @@ static int cgroup_get_rootdir(struct super_block *sb)
1451{ 1457{
1452 static const struct dentry_operations cgroup_dops = { 1458 static const struct dentry_operations cgroup_dops = {
1453 .d_iput = cgroup_diput, 1459 .d_iput = cgroup_diput,
1460 .d_delete = cgroup_delete,
1454 }; 1461 };
1455 1462
1456 struct inode *inode = 1463 struct inode *inode =
@@ -2195,12 +2202,20 @@ static const struct file_operations cgroup_file_operations = {
2195}; 2202};
2196 2203
2197static const struct inode_operations cgroup_dir_inode_operations = { 2204static const struct inode_operations cgroup_dir_inode_operations = {
2198 .lookup = simple_lookup, 2205 .lookup = cgroup_lookup,
2199 .mkdir = cgroup_mkdir, 2206 .mkdir = cgroup_mkdir,
2200 .rmdir = cgroup_rmdir, 2207 .rmdir = cgroup_rmdir,
2201 .rename = cgroup_rename, 2208 .rename = cgroup_rename,
2202}; 2209};
2203 2210
2211static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
2212{
2213 if (dentry->d_name.len > NAME_MAX)
2214 return ERR_PTR(-ENAMETOOLONG);
2215 d_add(dentry, NULL);
2216 return NULL;
2217}
2218
2204/* 2219/*
2205 * Check if a file is a control file 2220 * Check if a file is a control file
2206 */ 2221 */
diff --git a/kernel/cred.c b/kernel/cred.c
index 6a1aa004e376..3a9d6dd53a6c 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void)
252#endif 252#endif
253 253
254 atomic_set(&new->usage, 1); 254 atomic_set(&new->usage, 1);
255#ifdef CONFIG_DEBUG_CREDENTIALS
256 new->magic = CRED_MAGIC;
257#endif
255 258
256 if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) 259 if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
257 goto error; 260 goto error;
258 261
259#ifdef CONFIG_DEBUG_CREDENTIALS
260 new->magic = CRED_MAGIC;
261#endif
262 return new; 262 return new;
263 263
264error: 264error:
@@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
657 validate_creds(old); 657 validate_creds(old);
658 658
659 *new = *old; 659 *new = *old;
660 atomic_set(&new->usage, 1);
661 set_cred_subscribers(new, 0);
660 get_uid(new->user); 662 get_uid(new->user);
661 get_group_info(new->group_info); 663 get_group_info(new->group_info);
662 664
@@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
674 if (security_prepare_creds(new, old, GFP_KERNEL) < 0) 676 if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
675 goto error; 677 goto error;
676 678
677 atomic_set(&new->usage, 1);
678 set_cred_subscribers(new, 0);
679 put_cred(old); 679 put_cred(old);
680 validate_creds(new); 680 validate_creds(new);
681 return new; 681 return new;
@@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred)
748 if (cred->magic != CRED_MAGIC) 748 if (cred->magic != CRED_MAGIC)
749 return true; 749 return true;
750#ifdef CONFIG_SECURITY_SELINUX 750#ifdef CONFIG_SECURITY_SELINUX
751 if (selinux_is_enabled()) { 751 /*
752 * cred->security == NULL if security_cred_alloc_blank() or
753 * security_prepare_creds() returned an error.
754 */
755 if (selinux_is_enabled() && cred->security) {
752 if ((unsigned long) cred->security < PAGE_SIZE) 756 if ((unsigned long) cred->security < PAGE_SIZE)
753 return true; 757 return true;
754 if ((*(u32 *)cred->security & 0xffffff00) == 758 if ((*(u32 *)cred->security & 0xffffff00) ==
diff --git a/kernel/fork.c b/kernel/fork.c
index d9b44f20b6b0..25e429152ddc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -66,6 +66,7 @@
66#include <linux/posix-timers.h> 66#include <linux/posix-timers.h>
67#include <linux/user-return-notifier.h> 67#include <linux/user-return-notifier.h>
68#include <linux/oom.h> 68#include <linux/oom.h>
69#include <linux/khugepaged.h>
69 70
70#include <asm/pgtable.h> 71#include <asm/pgtable.h>
71#include <asm/pgalloc.h> 72#include <asm/pgalloc.h>
@@ -330,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
330 retval = ksm_fork(mm, oldmm); 331 retval = ksm_fork(mm, oldmm);
331 if (retval) 332 if (retval)
332 goto out; 333 goto out;
334 retval = khugepaged_fork(mm, oldmm);
335 if (retval)
336 goto out;
333 337
334 prev = NULL; 338 prev = NULL;
335 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { 339 for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
@@ -529,6 +533,9 @@ void __mmdrop(struct mm_struct *mm)
529 mm_free_pgd(mm); 533 mm_free_pgd(mm);
530 destroy_context(mm); 534 destroy_context(mm);
531 mmu_notifier_mm_destroy(mm); 535 mmu_notifier_mm_destroy(mm);
536#ifdef CONFIG_TRANSPARENT_HUGEPAGE
537 VM_BUG_ON(mm->pmd_huge_pte);
538#endif
532 free_mm(mm); 539 free_mm(mm);
533} 540}
534EXPORT_SYMBOL_GPL(__mmdrop); 541EXPORT_SYMBOL_GPL(__mmdrop);
@@ -543,6 +550,7 @@ void mmput(struct mm_struct *mm)
543 if (atomic_dec_and_test(&mm->mm_users)) { 550 if (atomic_dec_and_test(&mm->mm_users)) {
544 exit_aio(mm); 551 exit_aio(mm);
545 ksm_exit(mm); 552 ksm_exit(mm);
553 khugepaged_exit(mm); /* must run before exit_mmap */
546 exit_mmap(mm); 554 exit_mmap(mm);
547 set_mm_exe_file(mm, NULL); 555 set_mm_exe_file(mm, NULL);
548 if (!list_empty(&mm->mmlist)) { 556 if (!list_empty(&mm->mmlist)) {
@@ -669,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
669 mm->token_priority = 0; 677 mm->token_priority = 0;
670 mm->last_interval = 0; 678 mm->last_interval = 0;
671 679
680#ifdef CONFIG_TRANSPARENT_HUGEPAGE
681 mm->pmd_huge_pte = NULL;
682#endif
683
672 if (!mm_init(mm, tsk)) 684 if (!mm_init(mm, tsk))
673 goto fail_nomem; 685 goto fail_nomem;
674 686
@@ -910,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
910 922
911 sig->oom_adj = current->signal->oom_adj; 923 sig->oom_adj = current->signal->oom_adj;
912 sig->oom_score_adj = current->signal->oom_score_adj; 924 sig->oom_score_adj = current->signal->oom_score_adj;
925 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
913 926
914 mutex_init(&sig->cred_guard_mutex); 927 mutex_init(&sig->cred_guard_mutex);
915 928
@@ -1410,23 +1423,6 @@ long do_fork(unsigned long clone_flags,
1410 } 1423 }
1411 1424
1412 /* 1425 /*
1413 * We hope to recycle these flags after 2.6.26
1414 */
1415 if (unlikely(clone_flags & CLONE_STOPPED)) {
1416 static int __read_mostly count = 100;
1417
1418 if (count > 0 && printk_ratelimit()) {
1419 char comm[TASK_COMM_LEN];
1420
1421 count--;
1422 printk(KERN_INFO "fork(): process `%s' used deprecated "
1423 "clone flags 0x%lx\n",
1424 get_task_comm(comm, current),
1425 clone_flags & CLONE_STOPPED);
1426 }
1427 }
1428
1429 /*
1430 * When called from kernel_thread, don't do user tracing stuff. 1426 * When called from kernel_thread, don't do user tracing stuff.
1431 */ 1427 */
1432 if (likely(user_mode(regs))) 1428 if (likely(user_mode(regs)))
@@ -1464,16 +1460,7 @@ long do_fork(unsigned long clone_flags,
1464 */ 1460 */
1465 p->flags &= ~PF_STARTING; 1461 p->flags &= ~PF_STARTING;
1466 1462
1467 if (unlikely(clone_flags & CLONE_STOPPED)) { 1463 wake_up_new_task(p, clone_flags);
1468 /*
1469 * We'll start up with an immediate SIGSTOP.
1470 */
1471 sigaddset(&p->pending.signal, SIGSTOP);
1472 set_tsk_thread_flag(p, TIF_SIGPENDING);
1473 __set_task_state(p, TASK_STOPPED);
1474 } else {
1475 wake_up_new_task(p, clone_flags);
1476 }
1477 1464
1478 tracehook_report_clone_complete(trace, regs, 1465 tracehook_report_clone_complete(trace, regs,
1479 clone_flags, nr, p); 1466 clone_flags, nr, p);
diff --git a/kernel/futex.c b/kernel/futex.c
index 3019b92e6917..b766d28accd6 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
233{ 233{
234 unsigned long address = (unsigned long)uaddr; 234 unsigned long address = (unsigned long)uaddr;
235 struct mm_struct *mm = current->mm; 235 struct mm_struct *mm = current->mm;
236 struct page *page; 236 struct page *page, *page_head;
237 int err; 237 int err;
238 238
239 /* 239 /*
@@ -265,11 +265,46 @@ again:
265 if (err < 0) 265 if (err < 0)
266 return err; 266 return err;
267 267
268 page = compound_head(page); 268#ifdef CONFIG_TRANSPARENT_HUGEPAGE
269 lock_page(page); 269 page_head = page;
270 if (!page->mapping) { 270 if (unlikely(PageTail(page))) {
271 unlock_page(page);
272 put_page(page); 271 put_page(page);
272 /* serialize against __split_huge_page_splitting() */
273 local_irq_disable();
274 if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
275 page_head = compound_head(page);
276 /*
277 * page_head is valid pointer but we must pin
278 * it before taking the PG_lock and/or
279 * PG_compound_lock. The moment we re-enable
280 * irqs __split_huge_page_splitting() can
281 * return and the head page can be freed from
282 * under us. We can't take the PG_lock and/or
283 * PG_compound_lock on a page that could be
284 * freed from under us.
285 */
286 if (page != page_head) {
287 get_page(page_head);
288 put_page(page);
289 }
290 local_irq_enable();
291 } else {
292 local_irq_enable();
293 goto again;
294 }
295 }
296#else
297 page_head = compound_head(page);
298 if (page != page_head) {
299 get_page(page_head);
300 put_page(page);
301 }
302#endif
303
304 lock_page(page_head);
305 if (!page_head->mapping) {
306 unlock_page(page_head);
307 put_page(page_head);
273 goto again; 308 goto again;
274 } 309 }
275 310
@@ -280,20 +315,20 @@ again:
280 * it's a read-only handle, it's expected that futexes attach to 315 * it's a read-only handle, it's expected that futexes attach to
281 * the object not the particular process. 316 * the object not the particular process.
282 */ 317 */
283 if (PageAnon(page)) { 318 if (PageAnon(page_head)) {
284 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ 319 key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
285 key->private.mm = mm; 320 key->private.mm = mm;
286 key->private.address = address; 321 key->private.address = address;
287 } else { 322 } else {
288 key->both.offset |= FUT_OFF_INODE; /* inode-based key */ 323 key->both.offset |= FUT_OFF_INODE; /* inode-based key */
289 key->shared.inode = page->mapping->host; 324 key->shared.inode = page_head->mapping->host;
290 key->shared.pgoff = page->index; 325 key->shared.pgoff = page_head->index;
291 } 326 }
292 327
293 get_futex_key_refs(key); 328 get_futex_key_refs(key);
294 329
295 unlock_page(page); 330 unlock_page(page_head);
296 put_page(page); 331 put_page(page_head);
297 return 0; 332 return 0;
298} 333}
299 334
@@ -791,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
791 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); 826 new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
792 827
793 /* 828 /*
794 * This happens when we have stolen the lock and the original 829 * It is possible that the next waiter (the one that brought
795 * pending owner did not enqueue itself back on the rt_mutex. 830 * this owner to the kernel) timed out and is no longer
796 * Thats not a tragedy. We know that way, that a lock waiter 831 * waiting on the lock.
797 * is on the fly. We make the futex_q waiter the pending owner.
798 */ 832 */
799 if (!new_owner) 833 if (!new_owner)
800 new_owner = this->task; 834 new_owner = this->task;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 31d766bf5d2e..8e42fec7686d 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -9,9 +9,6 @@ menu "IRQ subsystem"
9config GENERIC_HARDIRQS 9config GENERIC_HARDIRQS
10 def_bool y 10 def_bool y
11 11
12config GENERIC_HARDIRQS_NO__DO_IRQ
13 def_bool y
14
15# Select this to disable the deprecated stuff 12# Select this to disable the deprecated stuff
16config GENERIC_HARDIRQS_NO_DEPRECATED 13config GENERIC_HARDIRQS_NO_DEPRECATED
17 def_bool n 14 def_bool n
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e2347eb63306..3540a7190122 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
118 118
119 return retval; 119 return retval;
120} 120}
121
122#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
123
124#ifdef CONFIG_ENABLE_WARN_DEPRECATED
125# warning __do_IRQ is deprecated. Please convert to proper flow handlers
126#endif
127
128/**
129 * __do_IRQ - original all in one highlevel IRQ handler
130 * @irq: the interrupt number
131 *
132 * __do_IRQ handles all normal device IRQ's (the special
133 * SMP cross-CPU interrupts have their own specific
134 * handlers).
135 *
136 * This is the original x86 implementation which is used for every
137 * interrupt type.
138 */
139unsigned int __do_IRQ(unsigned int irq)
140{
141 struct irq_desc *desc = irq_to_desc(irq);
142 struct irqaction *action;
143 unsigned int status;
144
145 kstat_incr_irqs_this_cpu(irq, desc);
146
147 if (CHECK_IRQ_PER_CPU(desc->status)) {
148 irqreturn_t action_ret;
149
150 /*
151 * No locking required for CPU-local interrupts:
152 */
153 if (desc->irq_data.chip->ack)
154 desc->irq_data.chip->ack(irq);
155 if (likely(!(desc->status & IRQ_DISABLED))) {
156 action_ret = handle_IRQ_event(irq, desc->action);
157 if (!noirqdebug)
158 note_interrupt(irq, desc, action_ret);
159 }
160 desc->irq_data.chip->end(irq);
161 return 1;
162 }
163
164 raw_spin_lock(&desc->lock);
165 if (desc->irq_data.chip->ack)
166 desc->irq_data.chip->ack(irq);
167 /*
168 * REPLAY is when Linux resends an IRQ that was dropped earlier
169 * WAITING is used by probe to mark irqs that are being tested
170 */
171 status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING);
172 status |= IRQ_PENDING; /* we _want_ to handle it */
173
174 /*
175 * If the IRQ is disabled for whatever reason, we cannot
176 * use the action we have.
177 */
178 action = NULL;
179 if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) {
180 action = desc->action;
181 status &= ~IRQ_PENDING; /* we commit to handling */
182 status |= IRQ_INPROGRESS; /* we are handling it */
183 }
184 desc->status = status;
185
186 /*
187 * If there is no IRQ handler or it was disabled, exit early.
188 * Since we set PENDING, if another processor is handling
189 * a different instance of this same irq, the other processor
190 * will take care of it.
191 */
192 if (unlikely(!action))
193 goto out;
194
195 /*
196 * Edge triggered interrupts need to remember
197 * pending events.
198 * This applies to any hw interrupts that allow a second
199 * instance of the same irq to arrive while we are in do_IRQ
200 * or in the handler. But the code here only handles the _second_
201 * instance of the irq, not the third or fourth. So it is mostly
202 * useful for irq hardware that does not mask cleanly in an
203 * SMP environment.
204 */
205 for (;;) {
206 irqreturn_t action_ret;
207
208 raw_spin_unlock(&desc->lock);
209
210 action_ret = handle_IRQ_event(irq, action);
211 if (!noirqdebug)
212 note_interrupt(irq, desc, action_ret);
213
214 raw_spin_lock(&desc->lock);
215 if (likely(!(desc->status & IRQ_PENDING)))
216 break;
217 desc->status &= ~IRQ_PENDING;
218 }
219 desc->status &= ~IRQ_INPROGRESS;
220
221out:
222 /*
223 * The ->end() handler has to deal with interrupts which got
224 * disabled while the handler was running.
225 */
226 desc->irq_data.chip->end(irq);
227 raw_spin_unlock(&desc->lock);
228
229 return 1;
230}
231#endif
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 9988d03797f5..282f20230e67 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; }
72 72
73static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) 73static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
74{ 74{
75 int cpu;
76
75 desc->irq_data.irq = irq; 77 desc->irq_data.irq = irq;
76 desc->irq_data.chip = &no_irq_chip; 78 desc->irq_data.chip = &no_irq_chip;
77 desc->irq_data.chip_data = NULL; 79 desc->irq_data.chip_data = NULL;
@@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
83 desc->irq_count = 0; 85 desc->irq_count = 0;
84 desc->irqs_unhandled = 0; 86 desc->irqs_unhandled = 0;
85 desc->name = NULL; 87 desc->name = NULL;
86 memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); 88 for_each_possible_cpu(cpu)
89 *per_cpu_ptr(desc->kstat_irqs, cpu) = 0;
87 desc_smp_init(desc, node); 90 desc_smp_init(desc, node);
88} 91}
89 92
@@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
133 if (!desc) 136 if (!desc)
134 return NULL; 137 return NULL;
135 /* allocate based on nr_cpu_ids */ 138 /* allocate based on nr_cpu_ids */
136 desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), 139 desc->kstat_irqs = alloc_percpu(unsigned int);
137 gfp, node);
138 if (!desc->kstat_irqs) 140 if (!desc->kstat_irqs)
139 goto err_desc; 141 goto err_desc;
140 142
@@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node)
149 return desc; 151 return desc;
150 152
151err_kstat: 153err_kstat:
152 kfree(desc->kstat_irqs); 154 free_percpu(desc->kstat_irqs);
153err_desc: 155err_desc:
154 kfree(desc); 156 kfree(desc);
155 return NULL; 157 return NULL;
@@ -166,7 +168,7 @@ static void free_desc(unsigned int irq)
166 mutex_unlock(&sparse_irq_lock); 168 mutex_unlock(&sparse_irq_lock);
167 169
168 free_masks(desc); 170 free_masks(desc);
169 kfree(desc->kstat_irqs); 171 free_percpu(desc->kstat_irqs);
170 kfree(desc); 172 kfree(desc);
171} 173}
172 174
@@ -234,7 +236,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
234 } 236 }
235}; 237};
236 238
237static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
238int __init early_irq_init(void) 239int __init early_irq_init(void)
239{ 240{
240 int count, i, node = first_online_node; 241 int count, i, node = first_online_node;
@@ -250,7 +251,8 @@ int __init early_irq_init(void)
250 for (i = 0; i < count; i++) { 251 for (i = 0; i < count; i++) {
251 desc[i].irq_data.irq = i; 252 desc[i].irq_data.irq = i;
252 desc[i].irq_data.chip = &no_irq_chip; 253 desc[i].irq_data.chip = &no_irq_chip;
253 desc[i].kstat_irqs = kstat_irqs_all[i]; 254 /* TODO : do this allocation on-demand ... */
255 desc[i].kstat_irqs = alloc_percpu(unsigned int);
254 alloc_masks(desc + i, GFP_KERNEL, node); 256 alloc_masks(desc + i, GFP_KERNEL, node);
255 desc_smp_init(desc + i, node); 257 desc_smp_init(desc + i, node);
256 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 258 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
@@ -275,6 +277,22 @@ static void free_desc(unsigned int irq)
275 277
276static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) 278static inline int alloc_descs(unsigned int start, unsigned int cnt, int node)
277{ 279{
280#if defined(CONFIG_KSTAT_IRQS_ONDEMAND)
281 struct irq_desc *desc;
282 unsigned int i;
283
284 for (i = 0; i < cnt; i++) {
285 desc = irq_to_desc(start + i);
286 if (desc && !desc->kstat_irqs) {
287 unsigned int __percpu *stats = alloc_percpu(unsigned int);
288
289 if (!stats)
290 return -1;
291 if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL)
292 free_percpu(stats);
293 }
294 }
295#endif
278 return start; 296 return start;
279} 297}
280#endif /* !CONFIG_SPARSE_IRQ */ 298#endif /* !CONFIG_SPARSE_IRQ */
@@ -391,7 +409,9 @@ void dynamic_irq_cleanup(unsigned int irq)
391unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) 409unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
392{ 410{
393 struct irq_desc *desc = irq_to_desc(irq); 411 struct irq_desc *desc = irq_to_desc(irq);
394 return desc ? desc->kstat_irqs[cpu] : 0; 412
413 return desc && desc->kstat_irqs ?
414 *per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
395} 415}
396 416
397#ifdef CONFIG_GENERIC_HARDIRQS 417#ifdef CONFIG_GENERIC_HARDIRQS
@@ -401,10 +421,10 @@ unsigned int kstat_irqs(unsigned int irq)
401 int cpu; 421 int cpu;
402 int sum = 0; 422 int sum = 0;
403 423
404 if (!desc) 424 if (!desc || !desc->kstat_irqs)
405 return 0; 425 return 0;
406 for_each_possible_cpu(cpu) 426 for_each_possible_cpu(cpu)
407 sum += desc->kstat_irqs[cpu]; 427 sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
408 return sum; 428 return sum;
409} 429}
410#endif /* CONFIG_GENERIC_HARDIRQS */ 430#endif /* CONFIG_GENERIC_HARDIRQS */
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index 1d2541940480..441fd629ff04 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -56,6 +56,7 @@ void move_masked_irq(int irq)
56void move_native_irq(int irq) 56void move_native_irq(int irq)
57{ 57{
58 struct irq_desc *desc = irq_to_desc(irq); 58 struct irq_desc *desc = irq_to_desc(irq);
59 bool masked;
59 60
60 if (likely(!(desc->status & IRQ_MOVE_PENDING))) 61 if (likely(!(desc->status & IRQ_MOVE_PENDING)))
61 return; 62 return;
@@ -63,8 +64,15 @@ void move_native_irq(int irq)
63 if (unlikely(desc->status & IRQ_DISABLED)) 64 if (unlikely(desc->status & IRQ_DISABLED))
64 return; 65 return;
65 66
66 desc->irq_data.chip->irq_mask(&desc->irq_data); 67 /*
68 * Be careful vs. already masked interrupts. If this is a
69 * threaded interrupt with ONESHOT set, we can end up with an
70 * interrupt storm.
71 */
72 masked = desc->status & IRQ_MASKED;
73 if (!masked)
74 desc->irq_data.chip->irq_mask(&desc->irq_data);
67 move_masked_irq(irq); 75 move_masked_irq(irq);
68 desc->irq_data.chip->irq_unmask(&desc->irq_data); 76 if (!masked)
77 desc->irq_data.chip->irq_unmask(&desc->irq_data);
69} 78}
70
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 42ba65dff7d9..0d2058da80f5 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2292,22 +2292,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark)
2292} 2292}
2293 2293
2294/* 2294/*
2295 * Debugging helper: via this flag we know that we are in
2296 * 'early bootup code', and will warn about any invalid irqs-on event:
2297 */
2298static int early_boot_irqs_enabled;
2299
2300void early_boot_irqs_off(void)
2301{
2302 early_boot_irqs_enabled = 0;
2303}
2304
2305void early_boot_irqs_on(void)
2306{
2307 early_boot_irqs_enabled = 1;
2308}
2309
2310/*
2311 * Hardirqs will be enabled: 2295 * Hardirqs will be enabled:
2312 */ 2296 */
2313void trace_hardirqs_on_caller(unsigned long ip) 2297void trace_hardirqs_on_caller(unsigned long ip)
@@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
2319 if (unlikely(!debug_locks || current->lockdep_recursion)) 2303 if (unlikely(!debug_locks || current->lockdep_recursion))
2320 return; 2304 return;
2321 2305
2322 if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) 2306 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled)))
2323 return; 2307 return;
2324 2308
2325 if (unlikely(curr->hardirqs_enabled)) { 2309 if (unlikely(curr->hardirqs_enabled)) {
diff --git a/kernel/module.c b/kernel/module.c
index 34e00b708fad..efa290ea94bf 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info)
2460#endif 2460#endif
2461 2461
2462#ifdef CONFIG_TRACEPOINTS 2462#ifdef CONFIG_TRACEPOINTS
2463 mod->tracepoints = section_objs(info, "__tracepoints", 2463 mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
2464 sizeof(*mod->tracepoints), 2464 sizeof(*mod->tracepoints_ptrs),
2465 &mod->num_tracepoints); 2465 &mod->num_tracepoints);
2466#endif 2466#endif
2467#ifdef HAVE_JUMP_LABEL 2467#ifdef HAVE_JUMP_LABEL
2468 mod->jump_entries = section_objs(info, "__jump_table", 2468 mod->jump_entries = section_objs(info, "__jump_table",
@@ -3393,7 +3393,7 @@ void module_layout(struct module *mod,
3393 struct modversion_info *ver, 3393 struct modversion_info *ver,
3394 struct kernel_param *kp, 3394 struct kernel_param *kp,
3395 struct kernel_symbol *ks, 3395 struct kernel_symbol *ks,
3396 struct tracepoint *tp) 3396 struct tracepoint * const *tp)
3397{ 3397{
3398} 3398}
3399EXPORT_SYMBOL(module_layout); 3399EXPORT_SYMBOL(module_layout);
@@ -3407,8 +3407,8 @@ void module_update_tracepoints(void)
3407 mutex_lock(&module_mutex); 3407 mutex_lock(&module_mutex);
3408 list_for_each_entry(mod, &modules, list) 3408 list_for_each_entry(mod, &modules, list)
3409 if (!mod->taints) 3409 if (!mod->taints)
3410 tracepoint_update_probe_range(mod->tracepoints, 3410 tracepoint_update_probe_range(mod->tracepoints_ptrs,
3411 mod->tracepoints + mod->num_tracepoints); 3411 mod->tracepoints_ptrs + mod->num_tracepoints);
3412 mutex_unlock(&module_mutex); 3412 mutex_unlock(&module_mutex);
3413} 3413}
3414 3414
@@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter)
3432 else if (iter_mod > iter->module) 3432 else if (iter_mod > iter->module)
3433 iter->tracepoint = NULL; 3433 iter->tracepoint = NULL;
3434 found = tracepoint_get_iter_range(&iter->tracepoint, 3434 found = tracepoint_get_iter_range(&iter->tracepoint,
3435 iter_mod->tracepoints, 3435 iter_mod->tracepoints_ptrs,
3436 iter_mod->tracepoints 3436 iter_mod->tracepoints_ptrs
3437 + iter_mod->num_tracepoints); 3437 + iter_mod->num_tracepoints);
3438 if (found) { 3438 if (found) {
3439 iter->module = iter_mod; 3439 iter->module = iter_mod;
diff --git a/kernel/panic.c b/kernel/panic.c
index 4c13b1a88ebb..991bb87a1704 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -34,6 +34,7 @@ static int pause_on_oops_flag;
34static DEFINE_SPINLOCK(pause_on_oops_lock); 34static DEFINE_SPINLOCK(pause_on_oops_lock);
35 35
36int panic_timeout; 36int panic_timeout;
37EXPORT_SYMBOL_GPL(panic_timeout);
37 38
38ATOMIC_NOTIFIER_HEAD(panic_notifier_list); 39ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
39 40
diff --git a/kernel/params.c b/kernel/params.c
index 08107d181758..0da1411222b9 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num)
719 params[i].ops->free(params[i].arg); 719 params[i].ops->free(params[i].arg);
720} 720}
721 721
722static void __init kernel_add_sysfs_param(const char *name, 722static struct module_kobject * __init locate_module_kobject(const char *name)
723 struct kernel_param *kparam,
724 unsigned int name_skip)
725{ 723{
726 struct module_kobject *mk; 724 struct module_kobject *mk;
727 struct kobject *kobj; 725 struct kobject *kobj;
@@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name,
729 727
730 kobj = kset_find_obj(module_kset, name); 728 kobj = kset_find_obj(module_kset, name);
731 if (kobj) { 729 if (kobj) {
732 /* We already have one. Remove params so we can add more. */
733 mk = to_module_kobject(kobj); 730 mk = to_module_kobject(kobj);
734 /* We need to remove it before adding parameters. */
735 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
736 } else { 731 } else {
737 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); 732 mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL);
738 BUG_ON(!mk); 733 BUG_ON(!mk);
@@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name,
743 "%s", name); 738 "%s", name);
744 if (err) { 739 if (err) {
745 kobject_put(&mk->kobj); 740 kobject_put(&mk->kobj);
746 printk(KERN_ERR "Module '%s' failed add to sysfs, " 741 printk(KERN_ERR
747 "error number %d\n", name, err); 742 "Module '%s' failed add to sysfs, error number %d\n",
748 printk(KERN_ERR "The system will be unstable now.\n"); 743 name, err);
749 return; 744 printk(KERN_ERR
745 "The system will be unstable now.\n");
746 return NULL;
750 } 747 }
751 /* So that exit path is even. */ 748
749 /* So that we hold reference in both cases. */
752 kobject_get(&mk->kobj); 750 kobject_get(&mk->kobj);
753 } 751 }
754 752
753 return mk;
754}
755
756static void __init kernel_add_sysfs_param(const char *name,
757 struct kernel_param *kparam,
758 unsigned int name_skip)
759{
760 struct module_kobject *mk;
761 int err;
762
763 mk = locate_module_kobject(name);
764 if (!mk)
765 return;
766
767 /* We need to remove old parameters before adding more. */
768 if (mk->mp)
769 sysfs_remove_group(&mk->kobj, &mk->mp->grp);
770
755 /* These should not fail at boot. */ 771 /* These should not fail at boot. */
756 err = add_sysfs_param(mk, kparam, kparam->name + name_skip); 772 err = add_sysfs_param(mk, kparam, kparam->name + name_skip);
757 BUG_ON(err); 773 BUG_ON(err);
@@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void)
796 } 812 }
797} 813}
798 814
815ssize_t __modver_version_show(struct module_attribute *mattr,
816 struct module *mod, char *buf)
817{
818 struct module_version_attribute *vattr =
819 container_of(mattr, struct module_version_attribute, mattr);
820
821 return sprintf(buf, "%s\n", vattr->version);
822}
823
824extern struct module_version_attribute __start___modver[], __stop___modver[];
825
826static void __init version_sysfs_builtin(void)
827{
828 const struct module_version_attribute *vattr;
829 struct module_kobject *mk;
830 int err;
831
832 for (vattr = __start___modver; vattr < __stop___modver; vattr++) {
833 mk = locate_module_kobject(vattr->module_name);
834 if (mk) {
835 err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr);
836 kobject_uevent(&mk->kobj, KOBJ_ADD);
837 kobject_put(&mk->kobj);
838 }
839 }
840}
799 841
800/* module-related sysfs stuff */ 842/* module-related sysfs stuff */
801 843
@@ -875,6 +917,7 @@ static int __init param_sysfs_init(void)
875 } 917 }
876 module_sysfs_initialized = 1; 918 module_sysfs_initialized = 1;
877 919
920 version_sysfs_builtin();
878 param_sysfs_builtin(); 921 param_sysfs_builtin();
879 922
880 return 0; 923 return 0;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 05ebe841270b..999835b6112b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1901,11 +1901,12 @@ static void __perf_event_read(void *info)
1901 return; 1901 return;
1902 1902
1903 raw_spin_lock(&ctx->lock); 1903 raw_spin_lock(&ctx->lock);
1904 update_context_time(ctx); 1904 if (ctx->is_active)
1905 update_context_time(ctx);
1905 update_event_times(event); 1906 update_event_times(event);
1907 if (event->state == PERF_EVENT_STATE_ACTIVE)
1908 event->pmu->read(event);
1906 raw_spin_unlock(&ctx->lock); 1909 raw_spin_unlock(&ctx->lock);
1907
1908 event->pmu->read(event);
1909} 1910}
1910 1911
1911static inline u64 perf_event_count(struct perf_event *event) 1912static inline u64 perf_event_count(struct perf_event *event)
@@ -1999,8 +2000,7 @@ static int alloc_callchain_buffers(void)
1999 * accessed from NMI. Use a temporary manual per cpu allocation 2000 * accessed from NMI. Use a temporary manual per cpu allocation
2000 * until that gets sorted out. 2001 * until that gets sorted out.
2001 */ 2002 */
2002 size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * 2003 size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]);
2003 num_possible_cpus();
2004 2004
2005 entries = kzalloc(size, GFP_KERNEL); 2005 entries = kzalloc(size, GFP_KERNEL);
2006 if (!entries) 2006 if (!entries)
@@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid)
2201 if (!task) 2201 if (!task)
2202 return ERR_PTR(-ESRCH); 2202 return ERR_PTR(-ESRCH);
2203 2203
2204 /*
2205 * Can't attach events to a dying task.
2206 */
2207 err = -ESRCH;
2208 if (task->flags & PF_EXITING)
2209 goto errout;
2210
2211 /* Reuse ptrace permission checks for now. */ 2204 /* Reuse ptrace permission checks for now. */
2212 err = -EACCES; 2205 err = -EACCES;
2213 if (!ptrace_may_access(task, PTRACE_MODE_READ)) 2206 if (!ptrace_may_access(task, PTRACE_MODE_READ))
@@ -2228,14 +2221,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2228 unsigned long flags; 2221 unsigned long flags;
2229 int ctxn, err; 2222 int ctxn, err;
2230 2223
2231 if (!task && cpu != -1) { 2224 if (!task) {
2232 /* Must be root to operate on a CPU event: */ 2225 /* Must be root to operate on a CPU event: */
2233 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) 2226 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
2234 return ERR_PTR(-EACCES); 2227 return ERR_PTR(-EACCES);
2235 2228
2236 if (cpu < 0 || cpu >= nr_cpumask_bits)
2237 return ERR_PTR(-EINVAL);
2238
2239 /* 2229 /*
2240 * We could be clever and allow to attach a event to an 2230 * We could be clever and allow to attach a event to an
2241 * offline CPU and activate it when the CPU comes up, but 2231 * offline CPU and activate it when the CPU comes up, but
@@ -2271,14 +2261,27 @@ retry:
2271 2261
2272 get_ctx(ctx); 2262 get_ctx(ctx);
2273 2263
2274 if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { 2264 err = 0;
2275 /* 2265 mutex_lock(&task->perf_event_mutex);
2276 * We raced with some other task; use 2266 /*
2277 * the context they set. 2267 * If it has already passed perf_event_exit_task().
2278 */ 2268 * we must see PF_EXITING, it takes this mutex too.
2269 */
2270 if (task->flags & PF_EXITING)
2271 err = -ESRCH;
2272 else if (task->perf_event_ctxp[ctxn])
2273 err = -EAGAIN;
2274 else
2275 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2276 mutex_unlock(&task->perf_event_mutex);
2277
2278 if (unlikely(err)) {
2279 put_task_struct(task); 2279 put_task_struct(task);
2280 kfree(ctx); 2280 kfree(ctx);
2281 goto retry; 2281
2282 if (err == -EAGAIN)
2283 goto retry;
2284 goto errout;
2282 } 2285 }
2283 } 2286 }
2284 2287
@@ -5377,6 +5380,8 @@ free_dev:
5377 goto out; 5380 goto out;
5378} 5381}
5379 5382
5383static struct lock_class_key cpuctx_mutex;
5384
5380int perf_pmu_register(struct pmu *pmu, char *name, int type) 5385int perf_pmu_register(struct pmu *pmu, char *name, int type)
5381{ 5386{
5382 int cpu, ret; 5387 int cpu, ret;
@@ -5425,6 +5430,7 @@ skip_type:
5425 5430
5426 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5431 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
5427 __perf_event_init_context(&cpuctx->ctx); 5432 __perf_event_init_context(&cpuctx->ctx);
5433 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
5428 cpuctx->ctx.type = cpu_context; 5434 cpuctx->ctx.type = cpu_context;
5429 cpuctx->ctx.pmu = pmu; 5435 cpuctx->ctx.pmu = pmu;
5430 cpuctx->jiffies_interval = 1; 5436 cpuctx->jiffies_interval = 1;
@@ -5541,6 +5547,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5541 struct hw_perf_event *hwc; 5547 struct hw_perf_event *hwc;
5542 long err; 5548 long err;
5543 5549
5550 if ((unsigned)cpu >= nr_cpu_ids) {
5551 if (!task || cpu != -1)
5552 return ERR_PTR(-EINVAL);
5553 }
5554
5544 event = kzalloc(sizeof(*event), GFP_KERNEL); 5555 event = kzalloc(sizeof(*event), GFP_KERNEL);
5545 if (!event) 5556 if (!event)
5546 return ERR_PTR(-ENOMEM); 5557 return ERR_PTR(-ENOMEM);
@@ -5589,7 +5600,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
5589 5600
5590 if (!overflow_handler && parent_event) 5601 if (!overflow_handler && parent_event)
5591 overflow_handler = parent_event->overflow_handler; 5602 overflow_handler = parent_event->overflow_handler;
5592 5603
5593 event->overflow_handler = overflow_handler; 5604 event->overflow_handler = overflow_handler;
5594 5605
5595 if (attr->disabled) 5606 if (attr->disabled)
@@ -6125,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
6125 * scheduled, so we are now safe from rescheduling changing 6136 * scheduled, so we are now safe from rescheduling changing
6126 * our context. 6137 * our context.
6127 */ 6138 */
6128 child_ctx = child->perf_event_ctxp[ctxn]; 6139 child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]);
6129 task_ctx_sched_out(child_ctx, EVENT_ALL); 6140 task_ctx_sched_out(child_ctx, EVENT_ALL);
6130 6141
6131 /* 6142 /*
@@ -6438,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6438 unsigned long flags; 6449 unsigned long flags;
6439 int ret = 0; 6450 int ret = 0;
6440 6451
6441 child->perf_event_ctxp[ctxn] = NULL;
6442
6443 mutex_init(&child->perf_event_mutex);
6444 INIT_LIST_HEAD(&child->perf_event_list);
6445
6446 if (likely(!parent->perf_event_ctxp[ctxn])) 6452 if (likely(!parent->perf_event_ctxp[ctxn]))
6447 return 0; 6453 return 0;
6448 6454
@@ -6494,7 +6500,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6494 6500
6495 raw_spin_lock_irqsave(&parent_ctx->lock, flags); 6501 raw_spin_lock_irqsave(&parent_ctx->lock, flags);
6496 parent_ctx->rotate_disable = 0; 6502 parent_ctx->rotate_disable = 0;
6497 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6498 6503
6499 child_ctx = child->perf_event_ctxp[ctxn]; 6504 child_ctx = child->perf_event_ctxp[ctxn];
6500 6505
@@ -6502,12 +6507,11 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6502 /* 6507 /*
6503 * Mark the child context as a clone of the parent 6508 * Mark the child context as a clone of the parent
6504 * context, or of whatever the parent is a clone of. 6509 * context, or of whatever the parent is a clone of.
6505 * Note that if the parent is a clone, it could get 6510 *
6506 * uncloned at any point, but that doesn't matter 6511 * Note that if the parent is a clone, the holding of
6507 * because the list of events and the generation 6512 * parent_ctx->lock avoids it from being uncloned.
6508 * count can't have changed since we took the mutex.
6509 */ 6513 */
6510 cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); 6514 cloned_ctx = parent_ctx->parent_ctx;
6511 if (cloned_ctx) { 6515 if (cloned_ctx) {
6512 child_ctx->parent_ctx = cloned_ctx; 6516 child_ctx->parent_ctx = cloned_ctx;
6513 child_ctx->parent_gen = parent_ctx->parent_gen; 6517 child_ctx->parent_gen = parent_ctx->parent_gen;
@@ -6518,6 +6522,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6518 get_ctx(child_ctx->parent_ctx); 6522 get_ctx(child_ctx->parent_ctx);
6519 } 6523 }
6520 6524
6525 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
6521 mutex_unlock(&parent_ctx->mutex); 6526 mutex_unlock(&parent_ctx->mutex);
6522 6527
6523 perf_unpin_context(parent_ctx); 6528 perf_unpin_context(parent_ctx);
@@ -6532,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child)
6532{ 6537{
6533 int ctxn, ret; 6538 int ctxn, ret;
6534 6539
6540 memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp));
6541 mutex_init(&child->perf_event_mutex);
6542 INIT_LIST_HEAD(&child->perf_event_list);
6543
6535 for_each_task_context_nr(ctxn) { 6544 for_each_task_context_nr(ctxn) {
6536 ret = perf_event_init_context(child, ctxn); 6545 ret = perf_event_init_context(child, ctxn);
6537 if (ret) 6546 if (ret)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index a5aff3ebad38..265729966ece 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG
100 depends on PM_ADVANCED_DEBUG 100 depends on PM_ADVANCED_DEBUG
101 default n 101 default n
102 102
103config SUSPEND_NVS
104 bool
105
106config SUSPEND 103config SUSPEND
107 bool "Suspend to RAM and standby" 104 bool "Suspend to RAM and standby"
108 depends on PM && ARCH_SUSPEND_POSSIBLE 105 depends on PM && ARCH_SUSPEND_POSSIBLE
109 select SUSPEND_NVS if HAS_IOMEM
110 default y 106 default y
111 ---help--- 107 ---help---
112 Allow the system to enter sleep states in which main memory is 108 Allow the system to enter sleep states in which main memory is
@@ -140,7 +136,6 @@ config HIBERNATION
140 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE 136 depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
141 select LZO_COMPRESS 137 select LZO_COMPRESS
142 select LZO_DECOMPRESS 138 select LZO_DECOMPRESS
143 select SUSPEND_NVS if HAS_IOMEM
144 ---help--- 139 ---help---
145 Enable the suspend to disk (STD) functionality, which is usually 140 Enable the suspend to disk (STD) functionality, which is usually
146 called "hibernation" in user interfaces. STD checkpoints the 141 called "hibernation" in user interfaces. STD checkpoints the
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index b75597235d85..c350e18b53e3 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -7,6 +7,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o
7obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o 7obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o
8obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ 8obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \
9 block_io.o 9 block_io.o
10obj-$(CONFIG_SUSPEND_NVS) += nvs.o
11 10
12obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o 11obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c
deleted file mode 100644
index 1836db60bbb6..000000000000
--- a/kernel/power/nvs.c
+++ /dev/null
@@ -1,136 +0,0 @@
1/*
2 * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory
3 *
4 * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
5 *
6 * This file is released under the GPLv2.
7 */
8
9#include <linux/io.h>
10#include <linux/kernel.h>
11#include <linux/list.h>
12#include <linux/mm.h>
13#include <linux/slab.h>
14#include <linux/suspend.h>
15
16/*
17 * Platforms, like ACPI, may want us to save some memory used by them during
18 * suspend and to restore the contents of this memory during the subsequent
19 * resume. The code below implements a mechanism allowing us to do that.
20 */
21
22struct nvs_page {
23 unsigned long phys_start;
24 unsigned int size;
25 void *kaddr;
26 void *data;
27 struct list_head node;
28};
29
30static LIST_HEAD(nvs_list);
31
32/**
33 * suspend_nvs_register - register platform NVS memory region to save
34 * @start - physical address of the region
35 * @size - size of the region
36 *
37 * The NVS region need not be page-aligned (both ends) and we arrange
38 * things so that the data from page-aligned addresses in this region will
39 * be copied into separate RAM pages.
40 */
41int suspend_nvs_register(unsigned long start, unsigned long size)
42{
43 struct nvs_page *entry, *next;
44
45 while (size > 0) {
46 unsigned int nr_bytes;
47
48 entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL);
49 if (!entry)
50 goto Error;
51
52 list_add_tail(&entry->node, &nvs_list);
53 entry->phys_start = start;
54 nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK);
55 entry->size = (size < nr_bytes) ? size : nr_bytes;
56
57 start += entry->size;
58 size -= entry->size;
59 }
60 return 0;
61
62 Error:
63 list_for_each_entry_safe(entry, next, &nvs_list, node) {
64 list_del(&entry->node);
65 kfree(entry);
66 }
67 return -ENOMEM;
68}
69
70/**
71 * suspend_nvs_free - free data pages allocated for saving NVS regions
72 */
73void suspend_nvs_free(void)
74{
75 struct nvs_page *entry;
76
77 list_for_each_entry(entry, &nvs_list, node)
78 if (entry->data) {
79 free_page((unsigned long)entry->data);
80 entry->data = NULL;
81 if (entry->kaddr) {
82 iounmap(entry->kaddr);
83 entry->kaddr = NULL;
84 }
85 }
86}
87
88/**
89 * suspend_nvs_alloc - allocate memory necessary for saving NVS regions
90 */
91int suspend_nvs_alloc(void)
92{
93 struct nvs_page *entry;
94
95 list_for_each_entry(entry, &nvs_list, node) {
96 entry->data = (void *)__get_free_page(GFP_KERNEL);
97 if (!entry->data) {
98 suspend_nvs_free();
99 return -ENOMEM;
100 }
101 }
102 return 0;
103}
104
105/**
106 * suspend_nvs_save - save NVS memory regions
107 */
108void suspend_nvs_save(void)
109{
110 struct nvs_page *entry;
111
112 printk(KERN_INFO "PM: Saving platform NVS memory\n");
113
114 list_for_each_entry(entry, &nvs_list, node)
115 if (entry->data) {
116 entry->kaddr = ioremap(entry->phys_start, entry->size);
117 memcpy(entry->data, entry->kaddr, entry->size);
118 }
119}
120
121/**
122 * suspend_nvs_restore - restore NVS memory regions
123 *
124 * This function is going to be called with interrupts disabled, so it
125 * cannot iounmap the virtual addresses used to access the NVS region.
126 */
127void suspend_nvs_restore(void)
128{
129 struct nvs_page *entry;
130
131 printk(KERN_INFO "PM: Restoring platform NVS memory\n");
132
133 list_for_each_entry(entry, &nvs_list, node)
134 if (entry->data)
135 memcpy(entry->kaddr, entry->data, entry->size);
136}
diff --git a/kernel/printk.c b/kernel/printk.c
index 53d9a9ec88e6..36231525e22f 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -97,7 +97,7 @@ static int console_locked, console_suspended;
97/* 97/*
98 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars 98 * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars
99 * It is also used in interesting ways to provide interlocking in 99 * It is also used in interesting ways to provide interlocking in
100 * release_console_sem(). 100 * console_unlock();.
101 */ 101 */
102static DEFINE_SPINLOCK(logbuf_lock); 102static DEFINE_SPINLOCK(logbuf_lock);
103 103
@@ -262,25 +262,47 @@ int dmesg_restrict = 1;
262int dmesg_restrict; 262int dmesg_restrict;
263#endif 263#endif
264 264
265static int syslog_action_restricted(int type)
266{
267 if (dmesg_restrict)
268 return 1;
269 /* Unless restricted, we allow "read all" and "get buffer size" for everybody */
270 return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER;
271}
272
273static int check_syslog_permissions(int type, bool from_file)
274{
275 /*
276 * If this is from /proc/kmsg and we've already opened it, then we've
277 * already done the capabilities checks at open time.
278 */
279 if (from_file && type != SYSLOG_ACTION_OPEN)
280 return 0;
281
282 if (syslog_action_restricted(type)) {
283 if (capable(CAP_SYSLOG))
284 return 0;
285 /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */
286 if (capable(CAP_SYS_ADMIN)) {
287 WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
288 "but no CAP_SYSLOG (deprecated).\n");
289 return 0;
290 }
291 return -EPERM;
292 }
293 return 0;
294}
295
265int do_syslog(int type, char __user *buf, int len, bool from_file) 296int do_syslog(int type, char __user *buf, int len, bool from_file)
266{ 297{
267 unsigned i, j, limit, count; 298 unsigned i, j, limit, count;
268 int do_clear = 0; 299 int do_clear = 0;
269 char c; 300 char c;
270 int error = 0; 301 int error;
271 302
272 /* 303 error = check_syslog_permissions(type, from_file);
273 * If this is from /proc/kmsg we only do the capabilities checks 304 if (error)
274 * at open time. 305 goto out;
275 */
276 if (type == SYSLOG_ACTION_OPEN || !from_file) {
277 if (dmesg_restrict && !capable(CAP_SYSLOG))
278 goto warn; /* switch to return -EPERM after 2.6.39 */
279 if ((type != SYSLOG_ACTION_READ_ALL &&
280 type != SYSLOG_ACTION_SIZE_BUFFER) &&
281 !capable(CAP_SYSLOG))
282 goto warn; /* switch to return -EPERM after 2.6.39 */
283 }
284 306
285 error = security_syslog(type); 307 error = security_syslog(type);
286 if (error) 308 if (error)
@@ -423,12 +445,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
423 } 445 }
424out: 446out:
425 return error; 447 return error;
426warn:
427 /* remove after 2.6.39 */
428 if (capable(CAP_SYS_ADMIN))
429 WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN "
430 "but no CAP_SYSLOG (deprecated and denied).\n");
431 return -EPERM;
432} 448}
433 449
434SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) 450SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len)
@@ -501,7 +517,7 @@ static void _call_console_drivers(unsigned start,
501/* 517/*
502 * Call the console drivers, asking them to write out 518 * Call the console drivers, asking them to write out
503 * log_buf[start] to log_buf[end - 1]. 519 * log_buf[start] to log_buf[end - 1].
504 * The console_sem must be held. 520 * The console_lock must be held.
505 */ 521 */
506static void call_console_drivers(unsigned start, unsigned end) 522static void call_console_drivers(unsigned start, unsigned end)
507{ 523{
@@ -604,11 +620,11 @@ static int have_callable_console(void)
604 * 620 *
605 * This is printk(). It can be called from any context. We want it to work. 621 * This is printk(). It can be called from any context. We want it to work.
606 * 622 *
607 * We try to grab the console_sem. If we succeed, it's easy - we log the output and 623 * We try to grab the console_lock. If we succeed, it's easy - we log the output and
608 * call the console drivers. If we fail to get the semaphore we place the output 624 * call the console drivers. If we fail to get the semaphore we place the output
609 * into the log buffer and return. The current holder of the console_sem will 625 * into the log buffer and return. The current holder of the console_sem will
610 * notice the new output in release_console_sem() and will send it to the 626 * notice the new output in console_unlock(); and will send it to the
611 * consoles before releasing the semaphore. 627 * consoles before releasing the lock.
612 * 628 *
613 * One effect of this deferred printing is that code which calls printk() and 629 * One effect of this deferred printing is that code which calls printk() and
614 * then changes console_loglevel may break. This is because console_loglevel 630 * then changes console_loglevel may break. This is because console_loglevel
@@ -659,19 +675,19 @@ static inline int can_use_console(unsigned int cpu)
659/* 675/*
660 * Try to get console ownership to actually show the kernel 676 * Try to get console ownership to actually show the kernel
661 * messages from a 'printk'. Return true (and with the 677 * messages from a 'printk'. Return true (and with the
662 * console_semaphore held, and 'console_locked' set) if it 678 * console_lock held, and 'console_locked' set) if it
663 * is successful, false otherwise. 679 * is successful, false otherwise.
664 * 680 *
665 * This gets called with the 'logbuf_lock' spinlock held and 681 * This gets called with the 'logbuf_lock' spinlock held and
666 * interrupts disabled. It should return with 'lockbuf_lock' 682 * interrupts disabled. It should return with 'lockbuf_lock'
667 * released but interrupts still disabled. 683 * released but interrupts still disabled.
668 */ 684 */
669static int acquire_console_semaphore_for_printk(unsigned int cpu) 685static int console_trylock_for_printk(unsigned int cpu)
670 __releases(&logbuf_lock) 686 __releases(&logbuf_lock)
671{ 687{
672 int retval = 0; 688 int retval = 0;
673 689
674 if (!try_acquire_console_sem()) { 690 if (console_trylock()) {
675 retval = 1; 691 retval = 1;
676 692
677 /* 693 /*
@@ -827,12 +843,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)
827 * actual magic (print out buffers, wake up klogd, 843 * actual magic (print out buffers, wake up klogd,
828 * etc). 844 * etc).
829 * 845 *
830 * The acquire_console_semaphore_for_printk() function 846 * The console_trylock_for_printk() function
831 * will release 'logbuf_lock' regardless of whether it 847 * will release 'logbuf_lock' regardless of whether it
832 * actually gets the semaphore or not. 848 * actually gets the semaphore or not.
833 */ 849 */
834 if (acquire_console_semaphore_for_printk(this_cpu)) 850 if (console_trylock_for_printk(this_cpu))
835 release_console_sem(); 851 console_unlock();
836 852
837 lockdep_on(); 853 lockdep_on();
838out_restore_irqs: 854out_restore_irqs:
@@ -993,7 +1009,7 @@ void suspend_console(void)
993 if (!console_suspend_enabled) 1009 if (!console_suspend_enabled)
994 return; 1010 return;
995 printk("Suspending console(s) (use no_console_suspend to debug)\n"); 1011 printk("Suspending console(s) (use no_console_suspend to debug)\n");
996 acquire_console_sem(); 1012 console_lock();
997 console_suspended = 1; 1013 console_suspended = 1;
998 up(&console_sem); 1014 up(&console_sem);
999} 1015}
@@ -1004,7 +1020,7 @@ void resume_console(void)
1004 return; 1020 return;
1005 down(&console_sem); 1021 down(&console_sem);
1006 console_suspended = 0; 1022 console_suspended = 0;
1007 release_console_sem(); 1023 console_unlock();
1008} 1024}
1009 1025
1010/** 1026/**
@@ -1027,21 +1043,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
1027 case CPU_DYING: 1043 case CPU_DYING:
1028 case CPU_DOWN_FAILED: 1044 case CPU_DOWN_FAILED:
1029 case CPU_UP_CANCELED: 1045 case CPU_UP_CANCELED:
1030 acquire_console_sem(); 1046 console_lock();
1031 release_console_sem(); 1047 console_unlock();
1032 } 1048 }
1033 return NOTIFY_OK; 1049 return NOTIFY_OK;
1034} 1050}
1035 1051
1036/** 1052/**
1037 * acquire_console_sem - lock the console system for exclusive use. 1053 * console_lock - lock the console system for exclusive use.
1038 * 1054 *
1039 * Acquires a semaphore which guarantees that the caller has 1055 * Acquires a lock which guarantees that the caller has
1040 * exclusive access to the console system and the console_drivers list. 1056 * exclusive access to the console system and the console_drivers list.
1041 * 1057 *
1042 * Can sleep, returns nothing. 1058 * Can sleep, returns nothing.
1043 */ 1059 */
1044void acquire_console_sem(void) 1060void console_lock(void)
1045{ 1061{
1046 BUG_ON(in_interrupt()); 1062 BUG_ON(in_interrupt());
1047 down(&console_sem); 1063 down(&console_sem);
@@ -1050,21 +1066,29 @@ void acquire_console_sem(void)
1050 console_locked = 1; 1066 console_locked = 1;
1051 console_may_schedule = 1; 1067 console_may_schedule = 1;
1052} 1068}
1053EXPORT_SYMBOL(acquire_console_sem); 1069EXPORT_SYMBOL(console_lock);
1054 1070
1055int try_acquire_console_sem(void) 1071/**
1072 * console_trylock - try to lock the console system for exclusive use.
1073 *
1074 * Tried to acquire a lock which guarantees that the caller has
1075 * exclusive access to the console system and the console_drivers list.
1076 *
1077 * returns 1 on success, and 0 on failure to acquire the lock.
1078 */
1079int console_trylock(void)
1056{ 1080{
1057 if (down_trylock(&console_sem)) 1081 if (down_trylock(&console_sem))
1058 return -1; 1082 return 0;
1059 if (console_suspended) { 1083 if (console_suspended) {
1060 up(&console_sem); 1084 up(&console_sem);
1061 return -1; 1085 return 0;
1062 } 1086 }
1063 console_locked = 1; 1087 console_locked = 1;
1064 console_may_schedule = 0; 1088 console_may_schedule = 0;
1065 return 0; 1089 return 1;
1066} 1090}
1067EXPORT_SYMBOL(try_acquire_console_sem); 1091EXPORT_SYMBOL(console_trylock);
1068 1092
1069int is_console_locked(void) 1093int is_console_locked(void)
1070{ 1094{
@@ -1095,20 +1119,20 @@ void wake_up_klogd(void)
1095} 1119}
1096 1120
1097/** 1121/**
1098 * release_console_sem - unlock the console system 1122 * console_unlock - unlock the console system
1099 * 1123 *
1100 * Releases the semaphore which the caller holds on the console system 1124 * Releases the console_lock which the caller holds on the console system
1101 * and the console driver list. 1125 * and the console driver list.
1102 * 1126 *
1103 * While the semaphore was held, console output may have been buffered 1127 * While the console_lock was held, console output may have been buffered
1104 * by printk(). If this is the case, release_console_sem() emits 1128 * by printk(). If this is the case, console_unlock(); emits
1105 * the output prior to releasing the semaphore. 1129 * the output prior to releasing the lock.
1106 * 1130 *
1107 * If there is output waiting for klogd, we wake it up. 1131 * If there is output waiting for klogd, we wake it up.
1108 * 1132 *
1109 * release_console_sem() may be called from any context. 1133 * console_unlock(); may be called from any context.
1110 */ 1134 */
1111void release_console_sem(void) 1135void console_unlock(void)
1112{ 1136{
1113 unsigned long flags; 1137 unsigned long flags;
1114 unsigned _con_start, _log_end; 1138 unsigned _con_start, _log_end;
@@ -1141,7 +1165,7 @@ void release_console_sem(void)
1141 if (wake_klogd) 1165 if (wake_klogd)
1142 wake_up_klogd(); 1166 wake_up_klogd();
1143} 1167}
1144EXPORT_SYMBOL(release_console_sem); 1168EXPORT_SYMBOL(console_unlock);
1145 1169
1146/** 1170/**
1147 * console_conditional_schedule - yield the CPU if required 1171 * console_conditional_schedule - yield the CPU if required
@@ -1150,7 +1174,7 @@ EXPORT_SYMBOL(release_console_sem);
1150 * if this CPU should yield the CPU to another task, do 1174 * if this CPU should yield the CPU to another task, do
1151 * so here. 1175 * so here.
1152 * 1176 *
1153 * Must be called within acquire_console_sem(). 1177 * Must be called within console_lock();.
1154 */ 1178 */
1155void __sched console_conditional_schedule(void) 1179void __sched console_conditional_schedule(void)
1156{ 1180{
@@ -1171,14 +1195,14 @@ void console_unblank(void)
1171 if (down_trylock(&console_sem) != 0) 1195 if (down_trylock(&console_sem) != 0)
1172 return; 1196 return;
1173 } else 1197 } else
1174 acquire_console_sem(); 1198 console_lock();
1175 1199
1176 console_locked = 1; 1200 console_locked = 1;
1177 console_may_schedule = 0; 1201 console_may_schedule = 0;
1178 for_each_console(c) 1202 for_each_console(c)
1179 if ((c->flags & CON_ENABLED) && c->unblank) 1203 if ((c->flags & CON_ENABLED) && c->unblank)
1180 c->unblank(); 1204 c->unblank();
1181 release_console_sem(); 1205 console_unlock();
1182} 1206}
1183 1207
1184/* 1208/*
@@ -1189,7 +1213,7 @@ struct tty_driver *console_device(int *index)
1189 struct console *c; 1213 struct console *c;
1190 struct tty_driver *driver = NULL; 1214 struct tty_driver *driver = NULL;
1191 1215
1192 acquire_console_sem(); 1216 console_lock();
1193 for_each_console(c) { 1217 for_each_console(c) {
1194 if (!c->device) 1218 if (!c->device)
1195 continue; 1219 continue;
@@ -1197,7 +1221,7 @@ struct tty_driver *console_device(int *index)
1197 if (driver) 1221 if (driver)
1198 break; 1222 break;
1199 } 1223 }
1200 release_console_sem(); 1224 console_unlock();
1201 return driver; 1225 return driver;
1202} 1226}
1203 1227
@@ -1208,17 +1232,17 @@ struct tty_driver *console_device(int *index)
1208 */ 1232 */
1209void console_stop(struct console *console) 1233void console_stop(struct console *console)
1210{ 1234{
1211 acquire_console_sem(); 1235 console_lock();
1212 console->flags &= ~CON_ENABLED; 1236 console->flags &= ~CON_ENABLED;
1213 release_console_sem(); 1237 console_unlock();
1214} 1238}
1215EXPORT_SYMBOL(console_stop); 1239EXPORT_SYMBOL(console_stop);
1216 1240
1217void console_start(struct console *console) 1241void console_start(struct console *console)
1218{ 1242{
1219 acquire_console_sem(); 1243 console_lock();
1220 console->flags |= CON_ENABLED; 1244 console->flags |= CON_ENABLED;
1221 release_console_sem(); 1245 console_unlock();
1222} 1246}
1223EXPORT_SYMBOL(console_start); 1247EXPORT_SYMBOL(console_start);
1224 1248
@@ -1340,7 +1364,7 @@ void register_console(struct console *newcon)
1340 * Put this console in the list - keep the 1364 * Put this console in the list - keep the
1341 * preferred driver at the head of the list. 1365 * preferred driver at the head of the list.
1342 */ 1366 */
1343 acquire_console_sem(); 1367 console_lock();
1344 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { 1368 if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) {
1345 newcon->next = console_drivers; 1369 newcon->next = console_drivers;
1346 console_drivers = newcon; 1370 console_drivers = newcon;
@@ -1352,14 +1376,14 @@ void register_console(struct console *newcon)
1352 } 1376 }
1353 if (newcon->flags & CON_PRINTBUFFER) { 1377 if (newcon->flags & CON_PRINTBUFFER) {
1354 /* 1378 /*
1355 * release_console_sem() will print out the buffered messages 1379 * console_unlock(); will print out the buffered messages
1356 * for us. 1380 * for us.
1357 */ 1381 */
1358 spin_lock_irqsave(&logbuf_lock, flags); 1382 spin_lock_irqsave(&logbuf_lock, flags);
1359 con_start = log_start; 1383 con_start = log_start;
1360 spin_unlock_irqrestore(&logbuf_lock, flags); 1384 spin_unlock_irqrestore(&logbuf_lock, flags);
1361 } 1385 }
1362 release_console_sem(); 1386 console_unlock();
1363 console_sysfs_notify(); 1387 console_sysfs_notify();
1364 1388
1365 /* 1389 /*
@@ -1396,7 +1420,7 @@ int unregister_console(struct console *console)
1396 return braille_unregister_console(console); 1420 return braille_unregister_console(console);
1397#endif 1421#endif
1398 1422
1399 acquire_console_sem(); 1423 console_lock();
1400 if (console_drivers == console) { 1424 if (console_drivers == console) {
1401 console_drivers=console->next; 1425 console_drivers=console->next;
1402 res = 0; 1426 res = 0;
@@ -1418,7 +1442,7 @@ int unregister_console(struct console *console)
1418 if (console_drivers != NULL && console->flags & CON_CONSDEV) 1442 if (console_drivers != NULL && console->flags & CON_CONSDEV)
1419 console_drivers->flags |= CON_CONSDEV; 1443 console_drivers->flags |= CON_CONSDEV;
1420 1444
1421 release_console_sem(); 1445 console_unlock();
1422 console_sysfs_notify(); 1446 console_sysfs_notify();
1423 return res; 1447 return res;
1424} 1448}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 99bbaa3e5b0d..1708b1e2972d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -313,7 +313,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
313 child->exit_code = data; 313 child->exit_code = data;
314 dead = __ptrace_detach(current, child); 314 dead = __ptrace_detach(current, child);
315 if (!child->exit_state) 315 if (!child->exit_state)
316 wake_up_process(child); 316 wake_up_state(child, TASK_TRACED | TASK_STOPPED);
317 } 317 }
318 write_unlock_irq(&tasklist_lock); 318 write_unlock_irq(&tasklist_lock);
319 319
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 034493724749..0c343b9a46d5 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -189,7 +189,8 @@ static int rcu_kthread(void *arg)
189 unsigned long flags; 189 unsigned long flags;
190 190
191 for (;;) { 191 for (;;) {
192 wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); 192 wait_event_interruptible(rcu_kthread_wq,
193 have_rcu_kthread_work != 0);
193 morework = rcu_boost(); 194 morework = rcu_boost();
194 local_irq_save(flags); 195 local_irq_save(flags);
195 work = have_rcu_kthread_work; 196 work = have_rcu_kthread_work;
diff --git a/kernel/sched.c b/kernel/sched.c
index ea3e5eff3878..18d38e4ec7ba 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -553,9 +553,6 @@ struct rq {
553 /* try_to_wake_up() stats */ 553 /* try_to_wake_up() stats */
554 unsigned int ttwu_count; 554 unsigned int ttwu_count;
555 unsigned int ttwu_local; 555 unsigned int ttwu_local;
556
557 /* BKL stats */
558 unsigned int bkl_count;
559#endif 556#endif
560}; 557};
561 558
@@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p)
609 struct task_group *tg; 606 struct task_group *tg;
610 struct cgroup_subsys_state *css; 607 struct cgroup_subsys_state *css;
611 608
609 if (p->flags & PF_EXITING)
610 return &root_task_group;
611
612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 612 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
613 lockdep_is_held(&task_rq(p)->lock)); 613 lockdep_is_held(&task_rq(p)->lock));
614 tg = container_of(css, struct task_group, css); 614 tg = container_of(css, struct task_group, css);
@@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev)
3887 schedstat_inc(this_rq(), sched_count); 3887 schedstat_inc(this_rq(), sched_count);
3888#ifdef CONFIG_SCHEDSTATS 3888#ifdef CONFIG_SCHEDSTATS
3889 if (unlikely(prev->lock_depth >= 0)) { 3889 if (unlikely(prev->lock_depth >= 0)) {
3890 schedstat_inc(this_rq(), bkl_count); 3890 schedstat_inc(this_rq(), rq_sched_info.bkl_count);
3891 schedstat_inc(prev, sched_info.bkl_count); 3891 schedstat_inc(prev, sched_info.bkl_count);
3892 } 3892 }
3893#endif 3893#endif
@@ -4871,7 +4871,8 @@ recheck:
4871 * assigned. 4871 * assigned.
4872 */ 4872 */
4873 if (rt_bandwidth_enabled() && rt_policy(policy) && 4873 if (rt_bandwidth_enabled() && rt_policy(policy) &&
4874 task_group(p)->rt_bandwidth.rt_runtime == 0) { 4874 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
4875 !task_group_is_autogroup(task_group(p))) {
4875 __task_rq_unlock(rq); 4876 __task_rq_unlock(rq);
4876 raw_spin_unlock_irqrestore(&p->pi_lock, flags); 4877 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4877 return -EPERM; 4878 return -EPERM;
@@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
8882 } 8883 }
8883} 8884}
8884 8885
8886static void
8887cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task)
8888{
8889 /*
8890 * cgroup_exit() is called in the copy_process() failure path.
8891 * Ignore this case since the task hasn't ran yet, this avoids
8892 * trying to poke a half freed task state from generic code.
8893 */
8894 if (!(task->flags & PF_EXITING))
8895 return;
8896
8897 sched_move_task(task);
8898}
8899
8885#ifdef CONFIG_FAIR_GROUP_SCHED 8900#ifdef CONFIG_FAIR_GROUP_SCHED
8886static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, 8901static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype,
8887 u64 shareval) 8902 u64 shareval)
@@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
8954 .destroy = cpu_cgroup_destroy, 8969 .destroy = cpu_cgroup_destroy,
8955 .can_attach = cpu_cgroup_can_attach, 8970 .can_attach = cpu_cgroup_can_attach,
8956 .attach = cpu_cgroup_attach, 8971 .attach = cpu_cgroup_attach,
8972 .exit = cpu_cgroup_exit,
8957 .populate = cpu_cgroup_populate, 8973 .populate = cpu_cgroup_populate,
8958 .subsys_id = cpu_cgroup_subsys_id, 8974 .subsys_id = cpu_cgroup_subsys_id,
8959 .early_init = 1, 8975 .early_init = 1,
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c
index 32a723b8f84c..9fb656283157 100644
--- a/kernel/sched_autogroup.c
+++ b/kernel/sched_autogroup.c
@@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref)
27{ 27{
28 struct autogroup *ag = container_of(kref, struct autogroup, kref); 28 struct autogroup *ag = container_of(kref, struct autogroup, kref);
29 29
30#ifdef CONFIG_RT_GROUP_SCHED
31 /* We've redirected RT tasks to the root task group... */
32 ag->tg->rt_se = NULL;
33 ag->tg->rt_rq = NULL;
34#endif
30 sched_destroy_group(ag->tg); 35 sched_destroy_group(ag->tg);
31} 36}
32 37
@@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p)
55 return ag; 60 return ag;
56} 61}
57 62
63#ifdef CONFIG_RT_GROUP_SCHED
64static void free_rt_sched_group(struct task_group *tg);
65#endif
66
58static inline struct autogroup *autogroup_create(void) 67static inline struct autogroup *autogroup_create(void)
59{ 68{
60 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); 69 struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL);
@@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void)
72 init_rwsem(&ag->lock); 81 init_rwsem(&ag->lock);
73 ag->id = atomic_inc_return(&autogroup_seq_nr); 82 ag->id = atomic_inc_return(&autogroup_seq_nr);
74 ag->tg = tg; 83 ag->tg = tg;
84#ifdef CONFIG_RT_GROUP_SCHED
85 /*
86 * Autogroup RT tasks are redirected to the root task group
87 * so we don't have to move tasks around upon policy change,
88 * or flail around trying to allocate bandwidth on the fly.
89 * A bandwidth exception in __sched_setscheduler() allows
90 * the policy change to proceed. Thereafter, task_group()
91 * returns &root_task_group, so zero bandwidth is required.
92 */
93 free_rt_sched_group(tg);
94 tg->rt_se = root_task_group.rt_se;
95 tg->rt_rq = root_task_group.rt_rq;
96#endif
75 tg->autogroup = ag; 97 tg->autogroup = ag;
76 98
77 return ag; 99 return ag;
@@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg)
106 return true; 128 return true;
107} 129}
108 130
131static inline bool task_group_is_autogroup(struct task_group *tg)
132{
133 return tg != &root_task_group && tg->autogroup;
134}
135
109static inline struct task_group * 136static inline struct task_group *
110autogroup_task_group(struct task_struct *p, struct task_group *tg) 137autogroup_task_group(struct task_struct *p, struct task_group *tg)
111{ 138{
@@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m)
231#ifdef CONFIG_SCHED_DEBUG 258#ifdef CONFIG_SCHED_DEBUG
232static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) 259static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
233{ 260{
261 int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled);
262
263 if (!enabled || !tg->autogroup)
264 return 0;
265
234 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); 266 return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
235} 267}
236#endif /* CONFIG_SCHED_DEBUG */ 268#endif /* CONFIG_SCHED_DEBUG */
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h
index 5358e241cb20..7b859ffe5dad 100644
--- a/kernel/sched_autogroup.h
+++ b/kernel/sched_autogroup.h
@@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg);
15 15
16static inline void autogroup_init(struct task_struct *init_task) { } 16static inline void autogroup_init(struct task_struct *init_task) { }
17static inline void autogroup_free(struct task_group *tg) { } 17static inline void autogroup_free(struct task_group *tg) { }
18static inline bool task_group_is_autogroup(struct task_group *tg)
19{
20 return 0;
21}
18 22
19static inline struct task_group * 23static inline struct task_group *
20autogroup_task_group(struct task_struct *p, struct task_group *tg) 24autogroup_task_group(struct task_struct *p, struct task_group *tg)
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 1dfae3d014b5..eb6cb8edd075 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -16,6 +16,8 @@
16#include <linux/kallsyms.h> 16#include <linux/kallsyms.h>
17#include <linux/utsname.h> 17#include <linux/utsname.h>
18 18
19static DEFINE_SPINLOCK(sched_debug_lock);
20
19/* 21/*
20 * This allows printing both to /proc/sched_debug and 22 * This allows printing both to /proc/sched_debug and
21 * to the console 23 * to the console
@@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
86} 88}
87#endif 89#endif
88 90
91#ifdef CONFIG_CGROUP_SCHED
92static char group_path[PATH_MAX];
93
94static char *task_group_path(struct task_group *tg)
95{
96 if (autogroup_path(tg, group_path, PATH_MAX))
97 return group_path;
98
99 /*
100 * May be NULL if the underlying cgroup isn't fully-created yet
101 */
102 if (!tg->css.cgroup) {
103 group_path[0] = '\0';
104 return group_path;
105 }
106 cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
107 return group_path;
108}
109#endif
110
89static void 111static void
90print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 112print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
91{ 113{
@@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
108 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", 130 SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
109 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); 131 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
110#endif 132#endif
133#ifdef CONFIG_CGROUP_SCHED
134 SEQ_printf(m, " %s", task_group_path(task_group(p)));
135#endif
111 136
112 SEQ_printf(m, "\n"); 137 SEQ_printf(m, "\n");
113} 138}
@@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
144 struct sched_entity *last; 169 struct sched_entity *last;
145 unsigned long flags; 170 unsigned long flags;
146 171
172#ifdef CONFIG_FAIR_GROUP_SCHED
173 SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
174#else
147 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); 175 SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
176#endif
148 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", 177 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock",
149 SPLIT_NS(cfs_rq->exec_clock)); 178 SPLIT_NS(cfs_rq->exec_clock));
150 179
@@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
191 220
192void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 221void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
193{ 222{
223#ifdef CONFIG_RT_GROUP_SCHED
224 SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
225#else
194 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); 226 SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
227#endif
195 228
196#define P(x) \ 229#define P(x) \
197 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 230 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
@@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running;
212static void print_cpu(struct seq_file *m, int cpu) 245static void print_cpu(struct seq_file *m, int cpu)
213{ 246{
214 struct rq *rq = cpu_rq(cpu); 247 struct rq *rq = cpu_rq(cpu);
248 unsigned long flags;
215 249
216#ifdef CONFIG_X86 250#ifdef CONFIG_X86
217 { 251 {
@@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu)
262 P(ttwu_count); 296 P(ttwu_count);
263 P(ttwu_local); 297 P(ttwu_local);
264 298
265 P(bkl_count); 299 SEQ_printf(m, " .%-30s: %d\n", "bkl_count",
300 rq->rq_sched_info.bkl_count);
266 301
267#undef P 302#undef P
303#undef P64
268#endif 304#endif
305 spin_lock_irqsave(&sched_debug_lock, flags);
269 print_cfs_stats(m, cpu); 306 print_cfs_stats(m, cpu);
270 print_rt_stats(m, cpu); 307 print_rt_stats(m, cpu);
271 308
309 rcu_read_lock();
272 print_rq(m, rq, cpu); 310 print_rq(m, rq, cpu);
311 rcu_read_unlock();
312 spin_unlock_irqrestore(&sched_debug_lock, flags);
273} 313}
274 314
275static const char *sched_tunable_scaling_names[] = { 315static const char *sched_tunable_scaling_names[] = {
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index c62ebae65cf0..0c26e2df450e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
699 cfs_rq->nr_running--; 699 cfs_rq->nr_running--;
700} 700}
701 701
702#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED 702#ifdef CONFIG_FAIR_GROUP_SCHED
703# ifdef CONFIG_SMP
703static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, 704static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq,
704 int global_update) 705 int global_update)
705{ 706{
@@ -721,10 +722,10 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
721 u64 now, delta; 722 u64 now, delta;
722 unsigned long load = cfs_rq->load.weight; 723 unsigned long load = cfs_rq->load.weight;
723 724
724 if (!cfs_rq) 725 if (cfs_rq->tg == &root_task_group)
725 return; 726 return;
726 727
727 now = rq_of(cfs_rq)->clock; 728 now = rq_of(cfs_rq)->clock_task;
728 delta = now - cfs_rq->load_stamp; 729 delta = now - cfs_rq->load_stamp;
729 730
730 /* truncate load history at 4 idle periods */ 731 /* truncate load history at 4 idle periods */
@@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
762 list_del_leaf_cfs_rq(cfs_rq); 763 list_del_leaf_cfs_rq(cfs_rq);
763} 764}
764 765
766static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
767 long weight_delta)
768{
769 long load_weight, load, shares;
770
771 load = cfs_rq->load.weight + weight_delta;
772
773 load_weight = atomic_read(&tg->load_weight);
774 load_weight -= cfs_rq->load_contribution;
775 load_weight += load;
776
777 shares = (tg->shares * load);
778 if (load_weight)
779 shares /= load_weight;
780
781 if (shares < MIN_SHARES)
782 shares = MIN_SHARES;
783 if (shares > tg->shares)
784 shares = tg->shares;
785
786 return shares;
787}
788
789static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
790{
791 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
792 update_cfs_load(cfs_rq, 0);
793 update_cfs_shares(cfs_rq, 0);
794 }
795}
796# else /* CONFIG_SMP */
797static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
798{
799}
800
801static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg,
802 long weight_delta)
803{
804 return tg->shares;
805}
806
807static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq)
808{
809}
810# endif /* CONFIG_SMP */
765static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 811static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
766 unsigned long weight) 812 unsigned long weight)
767{ 813{
@@ -782,41 +828,20 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta)
782{ 828{
783 struct task_group *tg; 829 struct task_group *tg;
784 struct sched_entity *se; 830 struct sched_entity *se;
785 long load_weight, load, shares; 831 long shares;
786
787 if (!cfs_rq)
788 return;
789 832
790 tg = cfs_rq->tg; 833 tg = cfs_rq->tg;
791 se = tg->se[cpu_of(rq_of(cfs_rq))]; 834 se = tg->se[cpu_of(rq_of(cfs_rq))];
792 if (!se) 835 if (!se)
793 return; 836 return;
794 837#ifndef CONFIG_SMP
795 load = cfs_rq->load.weight + weight_delta; 838 if (likely(se->load.weight == tg->shares))
796 839 return;
797 load_weight = atomic_read(&tg->load_weight); 840#endif
798 load_weight -= cfs_rq->load_contribution; 841 shares = calc_cfs_shares(cfs_rq, tg, weight_delta);
799 load_weight += load;
800
801 shares = (tg->shares * load);
802 if (load_weight)
803 shares /= load_weight;
804
805 if (shares < MIN_SHARES)
806 shares = MIN_SHARES;
807 if (shares > tg->shares)
808 shares = tg->shares;
809 842
810 reweight_entity(cfs_rq_of(se), se, shares); 843 reweight_entity(cfs_rq_of(se), se, shares);
811} 844}
812
813static void update_entity_shares_tick(struct cfs_rq *cfs_rq)
814{
815 if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) {
816 update_cfs_load(cfs_rq, 0);
817 update_cfs_shares(cfs_rq, 0);
818 }
819}
820#else /* CONFIG_FAIR_GROUP_SCHED */ 845#else /* CONFIG_FAIR_GROUP_SCHED */
821static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) 846static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update)
822{ 847{
@@ -1062,6 +1087,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
1062 struct sched_entity *se = __pick_next_entity(cfs_rq); 1087 struct sched_entity *se = __pick_next_entity(cfs_rq);
1063 s64 delta = curr->vruntime - se->vruntime; 1088 s64 delta = curr->vruntime - se->vruntime;
1064 1089
1090 if (delta < 0)
1091 return;
1092
1065 if (delta > ideal_runtime) 1093 if (delta > ideal_runtime)
1066 resched_task(rq_of(cfs_rq)->curr); 1094 resched_task(rq_of(cfs_rq)->curr);
1067 } 1095 }
@@ -1362,27 +1390,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
1362 return wl; 1390 return wl;
1363 1391
1364 for_each_sched_entity(se) { 1392 for_each_sched_entity(se) {
1365 long S, rw, s, a, b; 1393 long lw, w;
1366 1394
1367 S = se->my_q->tg->shares; 1395 tg = se->my_q->tg;
1368 s = se->load.weight; 1396 w = se->my_q->load.weight;
1369 rw = se->my_q->load.weight;
1370 1397
1371 a = S*(rw + wl); 1398 /* use this cpu's instantaneous contribution */
1372 b = S*rw + s*wg; 1399 lw = atomic_read(&tg->load_weight);
1400 lw -= se->my_q->load_contribution;
1401 lw += w + wg;
1373 1402
1374 wl = s*(a-b); 1403 wl += w;
1375 1404
1376 if (likely(b)) 1405 if (lw > 0 && wl < lw)
1377 wl /= b; 1406 wl = (wl * tg->shares) / lw;
1407 else
1408 wl = tg->shares;
1378 1409
1379 /* 1410 /* zero point is MIN_SHARES */
1380 * Assume the group is already running and will 1411 if (wl < MIN_SHARES)
1381 * thus already be accounted for in the weight. 1412 wl = MIN_SHARES;
1382 * 1413 wl -= se->load.weight;
1383 * That is, moving shares between CPUs, does not
1384 * alter the group weight.
1385 */
1386 wg = 0; 1414 wg = 0;
1387 } 1415 }
1388 1416
@@ -1401,7 +1429,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1401 1429
1402static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) 1430static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1403{ 1431{
1404 unsigned long this_load, load; 1432 s64 this_load, load;
1405 int idx, this_cpu, prev_cpu; 1433 int idx, this_cpu, prev_cpu;
1406 unsigned long tl_per_task; 1434 unsigned long tl_per_task;
1407 struct task_group *tg; 1435 struct task_group *tg;
@@ -1440,8 +1468,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1440 * Otherwise check if either cpus are near enough in load to allow this 1468 * Otherwise check if either cpus are near enough in load to allow this
1441 * task to be woken on this_cpu. 1469 * task to be woken on this_cpu.
1442 */ 1470 */
1443 if (this_load) { 1471 if (this_load > 0) {
1444 unsigned long this_eff_load, prev_eff_load; 1472 s64 this_eff_load, prev_eff_load;
1445 1473
1446 this_eff_load = 100; 1474 this_eff_load = 100;
1447 this_eff_load *= power_of(prev_cpu); 1475 this_eff_load *= power_of(prev_cpu);
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c914ec747ca6..ad6267714c84 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -625,7 +625,7 @@ static void update_curr_rt(struct rq *rq)
625 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 625 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
626 u64 delta_exec; 626 u64 delta_exec;
627 627
628 if (!task_has_rt_policy(curr)) 628 if (curr->sched_class != &rt_sched_class)
629 return; 629 return;
630 630
631 delta_exec = rq->clock_task - curr->se.exec_start; 631 delta_exec = rq->clock_task - curr->se.exec_start;
diff --git a/kernel/smp.c b/kernel/smp.c
index 4ec30e069987..9910744f0856 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -194,23 +194,52 @@ void generic_smp_call_function_interrupt(void)
194 */ 194 */
195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) { 195 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
196 int refs; 196 int refs;
197 void (*func) (void *info);
197 198
198 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) 199 /*
200 * Since we walk the list without any locks, we might
201 * see an entry that was completed, removed from the
202 * list and is in the process of being reused.
203 *
204 * We must check that the cpu is in the cpumask before
205 * checking the refs, and both must be set before
206 * executing the callback on this cpu.
207 */
208
209 if (!cpumask_test_cpu(cpu, data->cpumask))
210 continue;
211
212 smp_rmb();
213
214 if (atomic_read(&data->refs) == 0)
199 continue; 215 continue;
200 216
217 func = data->csd.func; /* for later warn */
201 data->csd.func(data->csd.info); 218 data->csd.func(data->csd.info);
202 219
220 /*
221 * If the cpu mask is not still set then it enabled interrupts,
222 * we took another smp interrupt, and executed the function
223 * twice on this cpu. In theory that copy decremented refs.
224 */
225 if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) {
226 WARN(1, "%pS enabled interrupts and double executed\n",
227 func);
228 continue;
229 }
230
203 refs = atomic_dec_return(&data->refs); 231 refs = atomic_dec_return(&data->refs);
204 WARN_ON(refs < 0); 232 WARN_ON(refs < 0);
205 if (!refs) {
206 raw_spin_lock(&call_function.lock);
207 list_del_rcu(&data->csd.list);
208 raw_spin_unlock(&call_function.lock);
209 }
210 233
211 if (refs) 234 if (refs)
212 continue; 235 continue;
213 236
237 WARN_ON(!cpumask_empty(data->cpumask));
238
239 raw_spin_lock(&call_function.lock);
240 list_del_rcu(&data->csd.list);
241 raw_spin_unlock(&call_function.lock);
242
214 csd_unlock(&data->csd); 243 csd_unlock(&data->csd);
215 } 244 }
216 245
@@ -430,7 +459,7 @@ void smp_call_function_many(const struct cpumask *mask,
430 * can't happen. 459 * can't happen.
431 */ 460 */
432 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 461 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
433 && !oops_in_progress); 462 && !oops_in_progress && !early_boot_irqs_disabled);
434 463
435 /* So, what's a CPU they want? Ignoring this one. */ 464 /* So, what's a CPU they want? Ignoring this one. */
436 cpu = cpumask_first_and(mask, cpu_online_mask); 465 cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -454,11 +483,21 @@ void smp_call_function_many(const struct cpumask *mask,
454 483
455 data = &__get_cpu_var(cfd_data); 484 data = &__get_cpu_var(cfd_data);
456 csd_lock(&data->csd); 485 csd_lock(&data->csd);
486 BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask));
457 487
458 data->csd.func = func; 488 data->csd.func = func;
459 data->csd.info = info; 489 data->csd.info = info;
460 cpumask_and(data->cpumask, mask, cpu_online_mask); 490 cpumask_and(data->cpumask, mask, cpu_online_mask);
461 cpumask_clear_cpu(this_cpu, data->cpumask); 491 cpumask_clear_cpu(this_cpu, data->cpumask);
492
493 /*
494 * To ensure the interrupt handler gets an complete view
495 * we order the cpumask and refs writes and order the read
496 * of them in the interrupt handler. In addition we may
497 * only clear our own cpu bit from the mask.
498 */
499 smp_wmb();
500
462 atomic_set(&data->refs, cpumask_weight(data->cpumask)); 501 atomic_set(&data->refs, cpumask_weight(data->cpumask));
463 502
464 raw_spin_lock_irqsave(&call_function.lock, flags); 503 raw_spin_lock_irqsave(&call_function.lock, flags);
@@ -533,17 +572,20 @@ void ipi_call_unlock_irq(void)
533#endif /* USE_GENERIC_SMP_HELPERS */ 572#endif /* USE_GENERIC_SMP_HELPERS */
534 573
535/* 574/*
536 * Call a function on all processors 575 * Call a function on all processors. May be used during early boot while
576 * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
577 * of local_irq_disable/enable().
537 */ 578 */
538int on_each_cpu(void (*func) (void *info), void *info, int wait) 579int on_each_cpu(void (*func) (void *info), void *info, int wait)
539{ 580{
581 unsigned long flags;
540 int ret = 0; 582 int ret = 0;
541 583
542 preempt_disable(); 584 preempt_disable();
543 ret = smp_call_function(func, info, wait); 585 ret = smp_call_function(func, info, wait);
544 local_irq_disable(); 586 local_irq_save(flags);
545 func(info); 587 func(info);
546 local_irq_enable(); 588 local_irq_restore(flags);
547 preempt_enable(); 589 preempt_enable();
548 return ret; 590 return ret;
549} 591}
diff --git a/kernel/srcu.c b/kernel/srcu.c
index 98d8c1e80edb..73ce23feaea9 100644
--- a/kernel/srcu.c
+++ b/kernel/srcu.c
@@ -156,6 +156,16 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx)
156EXPORT_SYMBOL_GPL(__srcu_read_unlock); 156EXPORT_SYMBOL_GPL(__srcu_read_unlock);
157 157
158/* 158/*
159 * We use an adaptive strategy for synchronize_srcu() and especially for
160 * synchronize_srcu_expedited(). We spin for a fixed time period
161 * (defined below) to allow SRCU readers to exit their read-side critical
162 * sections. If there are still some readers after 10 microseconds,
163 * we repeatedly block for 1-millisecond time periods. This approach
164 * has done well in testing, so there is no need for a config parameter.
165 */
166#define SYNCHRONIZE_SRCU_READER_DELAY 10
167
168/*
159 * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). 169 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
160 */ 170 */
161static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) 171static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
@@ -207,11 +217,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void))
207 * will have finished executing. We initially give readers 217 * will have finished executing. We initially give readers
208 * an arbitrarily chosen 10 microseconds to get out of their 218 * an arbitrarily chosen 10 microseconds to get out of their
209 * SRCU read-side critical sections, then loop waiting 1/HZ 219 * SRCU read-side critical sections, then loop waiting 1/HZ
210 * seconds per iteration. 220 * seconds per iteration. The 10-microsecond value has done
221 * very well in testing.
211 */ 222 */
212 223
213 if (srcu_readers_active_idx(sp, idx)) 224 if (srcu_readers_active_idx(sp, idx))
214 udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); 225 udelay(SYNCHRONIZE_SRCU_READER_DELAY);
215 while (srcu_readers_active_idx(sp, idx)) 226 while (srcu_readers_active_idx(sp, idx))
216 schedule_timeout_interruptible(1); 227 schedule_timeout_interruptible(1);
217 228
diff --git a/kernel/sys.c b/kernel/sys.c
index 31b71a276b40..18da702ec813 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1385,7 +1385,8 @@ static int check_prlimit_permission(struct task_struct *task)
1385 const struct cred *cred = current_cred(), *tcred; 1385 const struct cred *cred = current_cred(), *tcred;
1386 1386
1387 tcred = __task_cred(task); 1387 tcred = __task_cred(task);
1388 if ((cred->uid != tcred->euid || 1388 if (current != task &&
1389 (cred->uid != tcred->euid ||
1389 cred->uid != tcred->suid || 1390 cred->uid != tcred->suid ||
1390 cred->uid != tcred->uid || 1391 cred->uid != tcred->uid ||
1391 cred->gid != tcred->egid || 1392 cred->gid != tcred->egid ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index bc86bb32e126..0f1bd83db985 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -170,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write,
170#endif 170#endif
171 171
172#ifdef CONFIG_MAGIC_SYSRQ 172#ifdef CONFIG_MAGIC_SYSRQ
173static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ 173/* Note: sysrq code uses it's own private copy */
174static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
174 175
175static int sysrq_sysctl_handler(ctl_table *table, int write, 176static int sysrq_sysctl_handler(ctl_table *table, int write,
176 void __user *buffer, size_t *lenp, 177 void __user *buffer, size_t *lenp,
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c50a034de30f..6519cf62d9cd 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -113,7 +113,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);
113 * @shift: pointer to shift variable 113 * @shift: pointer to shift variable
114 * @from: frequency to convert from 114 * @from: frequency to convert from
115 * @to: frequency to convert to 115 * @to: frequency to convert to
116 * @minsec: guaranteed runtime conversion range in seconds 116 * @maxsec: guaranteed runtime conversion range in seconds
117 * 117 *
118 * The function evaluates the shift/mult pair for the scaled math 118 * The function evaluates the shift/mult pair for the scaled math
119 * operations of clocksources and clockevents. 119 * operations of clocksources and clockevents.
@@ -122,7 +122,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);
122 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock 122 * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
123 * event @to is the counter frequency and @from is NSEC_PER_SEC. 123 * event @to is the counter frequency and @from is NSEC_PER_SEC.
124 * 124 *
125 * The @minsec conversion range argument controls the time frame in 125 * The @maxsec conversion range argument controls the time frame in
126 * seconds which must be covered by the runtime conversion with the 126 * seconds which must be covered by the runtime conversion with the
127 * calculated mult and shift factors. This guarantees that no 64bit 127 * calculated mult and shift factors. This guarantees that no 64bit
128 * overflow happens when the input value of the conversion is 128 * overflow happens when the input value of the conversion is
@@ -131,7 +131,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time);
131 * factors. 131 * factors.
132 */ 132 */
133void 133void
134clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) 134clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
135{ 135{
136 u64 tmp; 136 u64 tmp;
137 u32 sft, sftacc= 32; 137 u32 sft, sftacc= 32;
@@ -140,7 +140,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
140 * Calculate the shift factor which is limiting the conversion 140 * Calculate the shift factor which is limiting the conversion
141 * range: 141 * range:
142 */ 142 */
143 tmp = ((u64)minsec * from) >> 32; 143 tmp = ((u64)maxsec * from) >> 32;
144 while (tmp) { 144 while (tmp) {
145 tmp >>=1; 145 tmp >>=1;
146 sftacc--; 146 sftacc--;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3e216e01bbd1..c55ea2433471 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void)
642 } 642 }
643 local_irq_enable(); 643 local_irq_enable();
644 644
645 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", 645 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
646 smp_processor_id());
647} 646}
648 647
649/* 648/*
@@ -795,8 +794,10 @@ void tick_setup_sched_timer(void)
795 } 794 }
796 795
797#ifdef CONFIG_NO_HZ 796#ifdef CONFIG_NO_HZ
798 if (tick_nohz_enabled) 797 if (tick_nohz_enabled) {
799 ts->nohz_mode = NOHZ_MODE_HIGHRES; 798 ts->nohz_mode = NOHZ_MODE_HIGHRES;
799 printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id());
800 }
800#endif 801#endif
801} 802}
802#endif /* HIGH_RES_TIMERS */ 803#endif /* HIGH_RES_TIMERS */
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5536aaf3ba36..d27c7562902c 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -49,7 +49,7 @@ struct timekeeper {
49 u32 mult; 49 u32 mult;
50}; 50};
51 51
52struct timekeeper timekeeper; 52static struct timekeeper timekeeper;
53 53
54/** 54/**
55 * timekeeper_setup_internals - Set up internals to use clocksource clock. 55 * timekeeper_setup_internals - Set up internals to use clocksource clock.
@@ -164,7 +164,7 @@ static struct timespec total_sleep_time;
164/* 164/*
165 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. 165 * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock.
166 */ 166 */
167struct timespec raw_time; 167static struct timespec raw_time;
168 168
169/* flag for if timekeeping is suspended */ 169/* flag for if timekeeping is suspended */
170int __read_mostly timekeeping_suspended; 170int __read_mostly timekeeping_suspended;
diff --git a/kernel/timer.c b/kernel/timer.c
index 43ca9936f2d0..d53ce66daea0 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -969,10 +969,14 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
969int del_timer_sync(struct timer_list *timer) 969int del_timer_sync(struct timer_list *timer)
970{ 970{
971#ifdef CONFIG_LOCKDEP 971#ifdef CONFIG_LOCKDEP
972 unsigned long flags;
973
974 raw_local_irq_save(flags);
972 local_bh_disable(); 975 local_bh_disable();
973 lock_map_acquire(&timer->lockdep_map); 976 lock_map_acquire(&timer->lockdep_map);
974 lock_map_release(&timer->lockdep_map); 977 lock_map_release(&timer->lockdep_map);
975 local_bh_enable(); 978 _local_bh_enable();
979 raw_local_irq_restore(flags);
976#endif 980#endif
977 /* 981 /*
978 * don't use it in hardirq context, because it 982 * don't use it in hardirq context, because it
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 153562d0b93c..d95721f33702 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
138 !blk_tracer_enabled)) 138 !blk_tracer_enabled))
139 return; 139 return;
140 140
141 /*
142 * If the BLK_TC_NOTIFY action mask isn't set, don't send any note
143 * message to the trace.
144 */
145 if (!(bt->act_mask & BLK_TC_NOTIFY))
146 return;
147
141 local_irq_save(flags); 148 local_irq_save(flags);
142 buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); 149 buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
143 va_start(args, fmt); 150 va_start(args, fmt);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 35fde09b81de..5f499e0438a4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod)
1284static void trace_module_add_events(struct module *mod) 1284static void trace_module_add_events(struct module *mod)
1285{ 1285{
1286 struct ftrace_module_file_ops *file_ops = NULL; 1286 struct ftrace_module_file_ops *file_ops = NULL;
1287 struct ftrace_event_call *call, *start, *end; 1287 struct ftrace_event_call **call, **start, **end;
1288 1288
1289 start = mod->trace_events; 1289 start = mod->trace_events;
1290 end = mod->trace_events + mod->num_trace_events; 1290 end = mod->trace_events + mod->num_trace_events;
@@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod)
1297 return; 1297 return;
1298 1298
1299 for_each_event(call, start, end) { 1299 for_each_event(call, start, end) {
1300 __trace_add_event_call(call, mod, 1300 __trace_add_event_call(*call, mod,
1301 &file_ops->id, &file_ops->enable, 1301 &file_ops->id, &file_ops->enable,
1302 &file_ops->filter, &file_ops->format); 1302 &file_ops->filter, &file_ops->format);
1303 } 1303 }
@@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = {
1367 .priority = 0, 1367 .priority = 0,
1368}; 1368};
1369 1369
1370extern struct ftrace_event_call __start_ftrace_events[]; 1370extern struct ftrace_event_call *__start_ftrace_events[];
1371extern struct ftrace_event_call __stop_ftrace_events[]; 1371extern struct ftrace_event_call *__stop_ftrace_events[];
1372 1372
1373static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; 1373static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1374 1374
@@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event);
1384 1384
1385static __init int event_trace_init(void) 1385static __init int event_trace_init(void)
1386{ 1386{
1387 struct ftrace_event_call *call; 1387 struct ftrace_event_call **call;
1388 struct dentry *d_tracer; 1388 struct dentry *d_tracer;
1389 struct dentry *entry; 1389 struct dentry *entry;
1390 struct dentry *d_events; 1390 struct dentry *d_events;
@@ -1430,7 +1430,7 @@ static __init int event_trace_init(void)
1430 pr_warning("tracing: Failed to allocate common fields"); 1430 pr_warning("tracing: Failed to allocate common fields");
1431 1431
1432 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { 1432 for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1433 __trace_add_event_call(call, NULL, &ftrace_event_id_fops, 1433 __trace_add_event_call(*call, NULL, &ftrace_event_id_fops,
1434 &ftrace_enable_fops, 1434 &ftrace_enable_fops,
1435 &ftrace_event_filter_fops, 1435 &ftrace_event_filter_fops,
1436 &ftrace_event_format_fops); 1436 &ftrace_event_format_fops);
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4b74d71705c0..bbeec31e0ae3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \
161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ 161 .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
162}; \ 162}; \
163 \ 163 \
164struct ftrace_event_call __used \ 164struct ftrace_event_call __used event_##call = { \
165__attribute__((__aligned__(4))) \
166__attribute__((section("_ftrace_events"))) event_##call = { \
167 .name = #call, \ 165 .name = #call, \
168 .event.type = etype, \ 166 .event.type = etype, \
169 .class = &event_class_ftrace_##call, \ 167 .class = &event_class_ftrace_##call, \
170 .print_fmt = print, \ 168 .print_fmt = print, \
171}; \ 169}; \
170struct ftrace_event_call __used \
171__attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
172 172
173#include "trace_entries.h" 173#include "trace_entries.h"
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 5cf8c602b880..92b6e1e12d98 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1)
453 * Stubs: 453 * Stubs:
454 */ 454 */
455 455
456void early_boot_irqs_off(void)
457{
458}
459
460void early_boot_irqs_on(void)
461{
462}
463
464void trace_softirqs_on(unsigned long ip) 456void trace_softirqs_on(unsigned long ip)
465{ 457{
466} 458}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index bac752f0cfb5..5c9fe08d2093 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event,
23static int syscall_enter_define_fields(struct ftrace_event_call *call); 23static int syscall_enter_define_fields(struct ftrace_event_call *call);
24static int syscall_exit_define_fields(struct ftrace_event_call *call); 24static int syscall_exit_define_fields(struct ftrace_event_call *call);
25 25
26/* All syscall exit events have the same fields */
27static LIST_HEAD(syscall_exit_fields);
28
29static struct list_head * 26static struct list_head *
30syscall_get_enter_fields(struct ftrace_event_call *call) 27syscall_get_enter_fields(struct ftrace_event_call *call)
31{ 28{
@@ -34,50 +31,45 @@ syscall_get_enter_fields(struct ftrace_event_call *call)
34 return &entry->enter_fields; 31 return &entry->enter_fields;
35} 32}
36 33
37static struct list_head *
38syscall_get_exit_fields(struct ftrace_event_call *call)
39{
40 return &syscall_exit_fields;
41}
42
43struct trace_event_functions enter_syscall_print_funcs = { 34struct trace_event_functions enter_syscall_print_funcs = {
44 .trace = print_syscall_enter, 35 .trace = print_syscall_enter,
45}; 36};
46 37
47struct trace_event_functions exit_syscall_print_funcs = { 38struct trace_event_functions exit_syscall_print_funcs = {
48 .trace = print_syscall_exit, 39 .trace = print_syscall_exit,
49}; 40};
50 41
51struct ftrace_event_class event_class_syscall_enter = { 42struct ftrace_event_class event_class_syscall_enter = {
52 .system = "syscalls", 43 .system = "syscalls",
53 .reg = syscall_enter_register, 44 .reg = syscall_enter_register,
54 .define_fields = syscall_enter_define_fields, 45 .define_fields = syscall_enter_define_fields,
55 .get_fields = syscall_get_enter_fields, 46 .get_fields = syscall_get_enter_fields,
56 .raw_init = init_syscall_trace, 47 .raw_init = init_syscall_trace,
57}; 48};
58 49
59struct ftrace_event_class event_class_syscall_exit = { 50struct ftrace_event_class event_class_syscall_exit = {
60 .system = "syscalls", 51 .system = "syscalls",
61 .reg = syscall_exit_register, 52 .reg = syscall_exit_register,
62 .define_fields = syscall_exit_define_fields, 53 .define_fields = syscall_exit_define_fields,
63 .get_fields = syscall_get_exit_fields, 54 .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields),
64 .raw_init = init_syscall_trace, 55 .raw_init = init_syscall_trace,
65}; 56};
66 57
67extern unsigned long __start_syscalls_metadata[]; 58extern struct syscall_metadata *__start_syscalls_metadata[];
68extern unsigned long __stop_syscalls_metadata[]; 59extern struct syscall_metadata *__stop_syscalls_metadata[];
69 60
70static struct syscall_metadata **syscalls_metadata; 61static struct syscall_metadata **syscalls_metadata;
71 62
72static struct syscall_metadata *find_syscall_meta(unsigned long syscall) 63static __init struct syscall_metadata *
64find_syscall_meta(unsigned long syscall)
73{ 65{
74 struct syscall_metadata *start; 66 struct syscall_metadata **start;
75 struct syscall_metadata *stop; 67 struct syscall_metadata **stop;
76 char str[KSYM_SYMBOL_LEN]; 68 char str[KSYM_SYMBOL_LEN];
77 69
78 70
79 start = (struct syscall_metadata *)__start_syscalls_metadata; 71 start = __start_syscalls_metadata;
80 stop = (struct syscall_metadata *)__stop_syscalls_metadata; 72 stop = __stop_syscalls_metadata;
81 kallsyms_lookup(syscall, NULL, NULL, NULL, str); 73 kallsyms_lookup(syscall, NULL, NULL, NULL, str);
82 74
83 for ( ; start < stop; start++) { 75 for ( ; start < stop; start++) {
@@ -87,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall)
87 * with "SyS" instead of "sys", leading to an unwanted 79 * with "SyS" instead of "sys", leading to an unwanted
88 * mismatch. 80 * mismatch.
89 */ 81 */
90 if (start->name && !strcmp(start->name + 3, str + 3)) 82 if ((*start)->name && !strcmp((*start)->name + 3, str + 3))
91 return start; 83 return *start;
92 } 84 }
93 return NULL; 85 return NULL;
94} 86}
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index e95ee7f31d43..68187af4889e 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -27,8 +27,8 @@
27#include <linux/sched.h> 27#include <linux/sched.h>
28#include <linux/jump_label.h> 28#include <linux/jump_label.h>
29 29
30extern struct tracepoint __start___tracepoints[]; 30extern struct tracepoint * const __start___tracepoints_ptrs[];
31extern struct tracepoint __stop___tracepoints[]; 31extern struct tracepoint * const __stop___tracepoints_ptrs[];
32 32
33/* Set to 1 to enable tracepoint debug output */ 33/* Set to 1 to enable tracepoint debug output */
34static const int tracepoint_debug; 34static const int tracepoint_debug;
@@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem)
298 * 298 *
299 * Updates the probe callback corresponding to a range of tracepoints. 299 * Updates the probe callback corresponding to a range of tracepoints.
300 */ 300 */
301void 301void tracepoint_update_probe_range(struct tracepoint * const *begin,
302tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) 302 struct tracepoint * const *end)
303{ 303{
304 struct tracepoint *iter; 304 struct tracepoint * const *iter;
305 struct tracepoint_entry *mark_entry; 305 struct tracepoint_entry *mark_entry;
306 306
307 if (!begin) 307 if (!begin)
@@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
309 309
310 mutex_lock(&tracepoints_mutex); 310 mutex_lock(&tracepoints_mutex);
311 for (iter = begin; iter < end; iter++) { 311 for (iter = begin; iter < end; iter++) {
312 mark_entry = get_tracepoint(iter->name); 312 mark_entry = get_tracepoint((*iter)->name);
313 if (mark_entry) { 313 if (mark_entry) {
314 set_tracepoint(&mark_entry, iter, 314 set_tracepoint(&mark_entry, *iter,
315 !!mark_entry->refcount); 315 !!mark_entry->refcount);
316 } else { 316 } else {
317 disable_tracepoint(iter); 317 disable_tracepoint(*iter);
318 } 318 }
319 } 319 }
320 mutex_unlock(&tracepoints_mutex); 320 mutex_unlock(&tracepoints_mutex);
@@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
326static void tracepoint_update_probes(void) 326static void tracepoint_update_probes(void)
327{ 327{
328 /* Core kernel tracepoints */ 328 /* Core kernel tracepoints */
329 tracepoint_update_probe_range(__start___tracepoints, 329 tracepoint_update_probe_range(__start___tracepoints_ptrs,
330 __stop___tracepoints); 330 __stop___tracepoints_ptrs);
331 /* tracepoints in modules. */ 331 /* tracepoints in modules. */
332 module_update_tracepoints(); 332 module_update_tracepoints();
333} 333}
@@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all);
514 * Will return the first tracepoint in the range if the input tracepoint is 514 * Will return the first tracepoint in the range if the input tracepoint is
515 * NULL. 515 * NULL.
516 */ 516 */
517int tracepoint_get_iter_range(struct tracepoint **tracepoint, 517int tracepoint_get_iter_range(struct tracepoint * const **tracepoint,
518 struct tracepoint *begin, struct tracepoint *end) 518 struct tracepoint * const *begin, struct tracepoint * const *end)
519{ 519{
520 if (!*tracepoint && begin != end) { 520 if (!*tracepoint && begin != end) {
521 *tracepoint = begin; 521 *tracepoint = begin;
@@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter)
534 /* Core kernel tracepoints */ 534 /* Core kernel tracepoints */
535 if (!iter->module) { 535 if (!iter->module) {
536 found = tracepoint_get_iter_range(&iter->tracepoint, 536 found = tracepoint_get_iter_range(&iter->tracepoint,
537 __start___tracepoints, __stop___tracepoints); 537 __start___tracepoints_ptrs,
538 __stop___tracepoints_ptrs);
538 if (found) 539 if (found)
539 goto end; 540 goto end;
540 } 541 }
@@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self,
585 switch (val) { 586 switch (val) {
586 case MODULE_STATE_COMING: 587 case MODULE_STATE_COMING:
587 case MODULE_STATE_GOING: 588 case MODULE_STATE_GOING:
588 tracepoint_update_probe_range(mod->tracepoints, 589 tracepoint_update_probe_range(mod->tracepoints_ptrs,
589 mod->tracepoints + mod->num_tracepoints); 590 mod->tracepoints_ptrs + mod->num_tracepoints);
590 break; 591 break;
591 } 592 }
592 return 0; 593 return 0;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d7ebdf4cea98..f37f974aa81b 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,7 +27,7 @@
27#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
28#include <linux/perf_event.h> 28#include <linux/perf_event.h>
29 29
30int watchdog_enabled; 30int watchdog_enabled = 1;
31int __read_mostly softlockup_thresh = 60; 31int __read_mostly softlockup_thresh = 60;
32 32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int no_watchdog;
47
48
49/* boot commands */ 46/* boot commands */
50/* 47/*
51 * Should we panic when a soft-lockup or hard-lockup occurs: 48 * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str)
58 if (!strncmp(str, "panic", 5)) 55 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 56 hardlockup_panic = 1;
60 else if (!strncmp(str, "0", 1)) 57 else if (!strncmp(str, "0", 1))
61 no_watchdog = 1; 58 watchdog_enabled = 0;
62 return 1; 59 return 1;
63} 60}
64__setup("nmi_watchdog=", hardlockup_panic_setup); 61__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
77 74
78static int __init nowatchdog_setup(char *str) 75static int __init nowatchdog_setup(char *str)
79{ 76{
80 no_watchdog = 1; 77 watchdog_enabled = 0;
81 return 1; 78 return 1;
82} 79}
83__setup("nowatchdog", nowatchdog_setup); 80__setup("nowatchdog", nowatchdog_setup);
@@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup);
85/* deprecated */ 82/* deprecated */
86static int __init nosoftlockup_setup(char *str) 83static int __init nosoftlockup_setup(char *str)
87{ 84{
88 no_watchdog = 1; 85 watchdog_enabled = 0;
89 return 1; 86 return 1;
90} 87}
91__setup("nosoftlockup", nosoftlockup_setup); 88__setup("nosoftlockup", nosoftlockup_setup);
@@ -432,9 +429,6 @@ static int watchdog_enable(int cpu)
432 wake_up_process(p); 429 wake_up_process(p);
433 } 430 }
434 431
435 /* if any cpu succeeds, watchdog is considered enabled for the system */
436 watchdog_enabled = 1;
437
438 return 0; 432 return 0;
439} 433}
440 434
@@ -462,12 +456,16 @@ static void watchdog_disable(int cpu)
462static void watchdog_enable_all_cpus(void) 456static void watchdog_enable_all_cpus(void)
463{ 457{
464 int cpu; 458 int cpu;
465 int result = 0; 459
460 watchdog_enabled = 0;
466 461
467 for_each_online_cpu(cpu) 462 for_each_online_cpu(cpu)
468 result += watchdog_enable(cpu); 463 if (!watchdog_enable(cpu))
464 /* if any cpu succeeds, watchdog is considered
465 enabled for the system */
466 watchdog_enabled = 1;
469 467
470 if (result) 468 if (!watchdog_enabled)
471 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); 469 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
472 470
473} 471}
@@ -476,9 +474,6 @@ static void watchdog_disable_all_cpus(void)
476{ 474{
477 int cpu; 475 int cpu;
478 476
479 if (no_watchdog)
480 return;
481
482 for_each_online_cpu(cpu) 477 for_each_online_cpu(cpu)
483 watchdog_disable(cpu); 478 watchdog_disable(cpu);
484 479
@@ -498,10 +493,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,
498{ 493{
499 proc_dointvec(table, write, buffer, length, ppos); 494 proc_dointvec(table, write, buffer, length, ppos);
500 495
501 if (watchdog_enabled) 496 if (write) {
502 watchdog_enable_all_cpus(); 497 if (watchdog_enabled)
503 else 498 watchdog_enable_all_cpus();
504 watchdog_disable_all_cpus(); 499 else
500 watchdog_disable_all_cpus();
501 }
505 return 0; 502 return 0;
506} 503}
507 504
@@ -530,7 +527,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
530 break; 527 break;
531 case CPU_ONLINE: 528 case CPU_ONLINE:
532 case CPU_ONLINE_FROZEN: 529 case CPU_ONLINE_FROZEN:
533 err = watchdog_enable(hotcpu); 530 if (watchdog_enabled)
531 err = watchdog_enable(hotcpu);
534 break; 532 break;
535#ifdef CONFIG_HOTPLUG_CPU 533#ifdef CONFIG_HOTPLUG_CPU
536 case CPU_UP_CANCELED: 534 case CPU_UP_CANCELED:
@@ -555,9 +553,6 @@ void __init lockup_detector_init(void)
555 void *cpu = (void *)(long)smp_processor_id(); 553 void *cpu = (void *)(long)smp_processor_id();
556 int err; 554 int err;
557 555
558 if (no_watchdog)
559 return;
560
561 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 556 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
562 WARN_ON(notifier_to_errno(err)); 557 WARN_ON(notifier_to_errno(err));
563 558
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8ee6ec82f88a..11869faa6819 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -768,7 +768,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags)
768 768
769 worker->flags &= ~flags; 769 worker->flags &= ~flags;
770 770
771 /* if transitioning out of NOT_RUNNING, increment nr_running */ 771 /*
772 * If transitioning out of NOT_RUNNING, increment nr_running. Note
773 * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask
774 * of multiple flags, not a single flag.
775 */
772 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) 776 if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING))
773 if (!(worker->flags & WORKER_NOT_RUNNING)) 777 if (!(worker->flags & WORKER_NOT_RUNNING))
774 atomic_inc(get_gcwq_nr_running(gcwq->cpu)); 778 atomic_inc(get_gcwq_nr_running(gcwq->cpu));
@@ -1840,7 +1844,7 @@ __acquires(&gcwq->lock)
1840 spin_unlock_irq(&gcwq->lock); 1844 spin_unlock_irq(&gcwq->lock);
1841 1845
1842 work_clear_pending(work); 1846 work_clear_pending(work);
1843 lock_map_acquire(&cwq->wq->lockdep_map); 1847 lock_map_acquire_read(&cwq->wq->lockdep_map);
1844 lock_map_acquire(&lockdep_map); 1848 lock_map_acquire(&lockdep_map);
1845 trace_workqueue_execute_start(work); 1849 trace_workqueue_execute_start(work);
1846 f(work); 1850 f(work);
@@ -2384,8 +2388,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
2384 insert_wq_barrier(cwq, barr, work, worker); 2388 insert_wq_barrier(cwq, barr, work, worker);
2385 spin_unlock_irq(&gcwq->lock); 2389 spin_unlock_irq(&gcwq->lock);
2386 2390
2387 lock_map_acquire(&cwq->wq->lockdep_map); 2391 /*
2392 * If @max_active is 1 or rescuer is in use, flushing another work
2393 * item on the same workqueue may lead to deadlock. Make sure the
2394 * flusher is not running on the same workqueue by verifying write
2395 * access.
2396 */
2397 if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER)
2398 lock_map_acquire(&cwq->wq->lockdep_map);
2399 else
2400 lock_map_acquire_read(&cwq->wq->lockdep_map);
2388 lock_map_release(&cwq->wq->lockdep_map); 2401 lock_map_release(&cwq->wq->lockdep_map);
2402
2389 return true; 2403 return true;
2390already_gone: 2404already_gone:
2391 spin_unlock_irq(&gcwq->lock); 2405 spin_unlock_irq(&gcwq->lock);