diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-02-15 04:24:31 -0500 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-02-15 04:24:31 -0500 |
commit | 0a9d59a2461477bd9ed143c01af9df3f8f00fa81 (patch) | |
tree | df997d1cfb0786427a0df1fbd6f0640fa4248cf4 /kernel | |
parent | a23ce6da9677d245aa0aadc99f4197030350ab54 (diff) | |
parent | 795abaf1e4e188c4171e3cd3dbb11a9fcacaf505 (diff) |
Merge branch 'master' into for-next
Diffstat (limited to 'kernel')
42 files changed, 681 insertions, 626 deletions
diff --git a/kernel/capability.c b/kernel/capability.c index 2f05303715a5..9e9385f132c8 100644 --- a/kernel/capability.c +++ b/kernel/capability.c | |||
@@ -306,7 +306,7 @@ int capable(int cap) | |||
306 | BUG(); | 306 | BUG(); |
307 | } | 307 | } |
308 | 308 | ||
309 | if (security_capable(cap) == 0) { | 309 | if (security_capable(current_cred(), cap) == 0) { |
310 | current->flags |= PF_SUPERPRIV; | 310 | current->flags |= PF_SUPERPRIV; |
311 | return 1; | 311 | return 1; |
312 | } | 312 | } |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5c5f4cc2e99a..b24d7027b83c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); | |||
764 | */ | 764 | */ |
765 | 765 | ||
766 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); | 766 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, int mode); |
767 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); | ||
767 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 768 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
768 | static int cgroup_populate_dir(struct cgroup *cgrp); | 769 | static int cgroup_populate_dir(struct cgroup *cgrp); |
769 | static const struct inode_operations cgroup_dir_inode_operations; | 770 | static const struct inode_operations cgroup_dir_inode_operations; |
@@ -860,6 +861,11 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
860 | iput(inode); | 861 | iput(inode); |
861 | } | 862 | } |
862 | 863 | ||
864 | static int cgroup_delete(const struct dentry *d) | ||
865 | { | ||
866 | return 1; | ||
867 | } | ||
868 | |||
863 | static void remove_dir(struct dentry *d) | 869 | static void remove_dir(struct dentry *d) |
864 | { | 870 | { |
865 | struct dentry *parent = dget(d->d_parent); | 871 | struct dentry *parent = dget(d->d_parent); |
@@ -910,7 +916,7 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
910 | 916 | ||
911 | parent = dentry->d_parent; | 917 | parent = dentry->d_parent; |
912 | spin_lock(&parent->d_lock); | 918 | spin_lock(&parent->d_lock); |
913 | spin_lock(&dentry->d_lock); | 919 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
914 | list_del_init(&dentry->d_u.d_child); | 920 | list_del_init(&dentry->d_u.d_child); |
915 | spin_unlock(&dentry->d_lock); | 921 | spin_unlock(&dentry->d_lock); |
916 | spin_unlock(&parent->d_lock); | 922 | spin_unlock(&parent->d_lock); |
@@ -1451,6 +1457,7 @@ static int cgroup_get_rootdir(struct super_block *sb) | |||
1451 | { | 1457 | { |
1452 | static const struct dentry_operations cgroup_dops = { | 1458 | static const struct dentry_operations cgroup_dops = { |
1453 | .d_iput = cgroup_diput, | 1459 | .d_iput = cgroup_diput, |
1460 | .d_delete = cgroup_delete, | ||
1454 | }; | 1461 | }; |
1455 | 1462 | ||
1456 | struct inode *inode = | 1463 | struct inode *inode = |
@@ -2195,12 +2202,20 @@ static const struct file_operations cgroup_file_operations = { | |||
2195 | }; | 2202 | }; |
2196 | 2203 | ||
2197 | static const struct inode_operations cgroup_dir_inode_operations = { | 2204 | static const struct inode_operations cgroup_dir_inode_operations = { |
2198 | .lookup = simple_lookup, | 2205 | .lookup = cgroup_lookup, |
2199 | .mkdir = cgroup_mkdir, | 2206 | .mkdir = cgroup_mkdir, |
2200 | .rmdir = cgroup_rmdir, | 2207 | .rmdir = cgroup_rmdir, |
2201 | .rename = cgroup_rename, | 2208 | .rename = cgroup_rename, |
2202 | }; | 2209 | }; |
2203 | 2210 | ||
2211 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
2212 | { | ||
2213 | if (dentry->d_name.len > NAME_MAX) | ||
2214 | return ERR_PTR(-ENAMETOOLONG); | ||
2215 | d_add(dentry, NULL); | ||
2216 | return NULL; | ||
2217 | } | ||
2218 | |||
2204 | /* | 2219 | /* |
2205 | * Check if a file is a control file | 2220 | * Check if a file is a control file |
2206 | */ | 2221 | */ |
diff --git a/kernel/cred.c b/kernel/cred.c index 6a1aa004e376..3a9d6dd53a6c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c | |||
@@ -252,13 +252,13 @@ struct cred *cred_alloc_blank(void) | |||
252 | #endif | 252 | #endif |
253 | 253 | ||
254 | atomic_set(&new->usage, 1); | 254 | atomic_set(&new->usage, 1); |
255 | #ifdef CONFIG_DEBUG_CREDENTIALS | ||
256 | new->magic = CRED_MAGIC; | ||
257 | #endif | ||
255 | 258 | ||
256 | if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) | 259 | if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) |
257 | goto error; | 260 | goto error; |
258 | 261 | ||
259 | #ifdef CONFIG_DEBUG_CREDENTIALS | ||
260 | new->magic = CRED_MAGIC; | ||
261 | #endif | ||
262 | return new; | 262 | return new; |
263 | 263 | ||
264 | error: | 264 | error: |
@@ -657,6 +657,8 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
657 | validate_creds(old); | 657 | validate_creds(old); |
658 | 658 | ||
659 | *new = *old; | 659 | *new = *old; |
660 | atomic_set(&new->usage, 1); | ||
661 | set_cred_subscribers(new, 0); | ||
660 | get_uid(new->user); | 662 | get_uid(new->user); |
661 | get_group_info(new->group_info); | 663 | get_group_info(new->group_info); |
662 | 664 | ||
@@ -674,8 +676,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) | |||
674 | if (security_prepare_creds(new, old, GFP_KERNEL) < 0) | 676 | if (security_prepare_creds(new, old, GFP_KERNEL) < 0) |
675 | goto error; | 677 | goto error; |
676 | 678 | ||
677 | atomic_set(&new->usage, 1); | ||
678 | set_cred_subscribers(new, 0); | ||
679 | put_cred(old); | 679 | put_cred(old); |
680 | validate_creds(new); | 680 | validate_creds(new); |
681 | return new; | 681 | return new; |
@@ -748,7 +748,11 @@ bool creds_are_invalid(const struct cred *cred) | |||
748 | if (cred->magic != CRED_MAGIC) | 748 | if (cred->magic != CRED_MAGIC) |
749 | return true; | 749 | return true; |
750 | #ifdef CONFIG_SECURITY_SELINUX | 750 | #ifdef CONFIG_SECURITY_SELINUX |
751 | if (selinux_is_enabled()) { | 751 | /* |
752 | * cred->security == NULL if security_cred_alloc_blank() or | ||
753 | * security_prepare_creds() returned an error. | ||
754 | */ | ||
755 | if (selinux_is_enabled() && cred->security) { | ||
752 | if ((unsigned long) cred->security < PAGE_SIZE) | 756 | if ((unsigned long) cred->security < PAGE_SIZE) |
753 | return true; | 757 | return true; |
754 | if ((*(u32 *)cred->security & 0xffffff00) == | 758 | if ((*(u32 *)cred->security & 0xffffff00) == |
diff --git a/kernel/fork.c b/kernel/fork.c index d9b44f20b6b0..25e429152ddc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -66,6 +66,7 @@ | |||
66 | #include <linux/posix-timers.h> | 66 | #include <linux/posix-timers.h> |
67 | #include <linux/user-return-notifier.h> | 67 | #include <linux/user-return-notifier.h> |
68 | #include <linux/oom.h> | 68 | #include <linux/oom.h> |
69 | #include <linux/khugepaged.h> | ||
69 | 70 | ||
70 | #include <asm/pgtable.h> | 71 | #include <asm/pgtable.h> |
71 | #include <asm/pgalloc.h> | 72 | #include <asm/pgalloc.h> |
@@ -330,6 +331,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
330 | retval = ksm_fork(mm, oldmm); | 331 | retval = ksm_fork(mm, oldmm); |
331 | if (retval) | 332 | if (retval) |
332 | goto out; | 333 | goto out; |
334 | retval = khugepaged_fork(mm, oldmm); | ||
335 | if (retval) | ||
336 | goto out; | ||
333 | 337 | ||
334 | prev = NULL; | 338 | prev = NULL; |
335 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { | 339 | for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) { |
@@ -529,6 +533,9 @@ void __mmdrop(struct mm_struct *mm) | |||
529 | mm_free_pgd(mm); | 533 | mm_free_pgd(mm); |
530 | destroy_context(mm); | 534 | destroy_context(mm); |
531 | mmu_notifier_mm_destroy(mm); | 535 | mmu_notifier_mm_destroy(mm); |
536 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
537 | VM_BUG_ON(mm->pmd_huge_pte); | ||
538 | #endif | ||
532 | free_mm(mm); | 539 | free_mm(mm); |
533 | } | 540 | } |
534 | EXPORT_SYMBOL_GPL(__mmdrop); | 541 | EXPORT_SYMBOL_GPL(__mmdrop); |
@@ -543,6 +550,7 @@ void mmput(struct mm_struct *mm) | |||
543 | if (atomic_dec_and_test(&mm->mm_users)) { | 550 | if (atomic_dec_and_test(&mm->mm_users)) { |
544 | exit_aio(mm); | 551 | exit_aio(mm); |
545 | ksm_exit(mm); | 552 | ksm_exit(mm); |
553 | khugepaged_exit(mm); /* must run before exit_mmap */ | ||
546 | exit_mmap(mm); | 554 | exit_mmap(mm); |
547 | set_mm_exe_file(mm, NULL); | 555 | set_mm_exe_file(mm, NULL); |
548 | if (!list_empty(&mm->mmlist)) { | 556 | if (!list_empty(&mm->mmlist)) { |
@@ -669,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk) | |||
669 | mm->token_priority = 0; | 677 | mm->token_priority = 0; |
670 | mm->last_interval = 0; | 678 | mm->last_interval = 0; |
671 | 679 | ||
680 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | ||
681 | mm->pmd_huge_pte = NULL; | ||
682 | #endif | ||
683 | |||
672 | if (!mm_init(mm, tsk)) | 684 | if (!mm_init(mm, tsk)) |
673 | goto fail_nomem; | 685 | goto fail_nomem; |
674 | 686 | ||
@@ -910,6 +922,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
910 | 922 | ||
911 | sig->oom_adj = current->signal->oom_adj; | 923 | sig->oom_adj = current->signal->oom_adj; |
912 | sig->oom_score_adj = current->signal->oom_score_adj; | 924 | sig->oom_score_adj = current->signal->oom_score_adj; |
925 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | ||
913 | 926 | ||
914 | mutex_init(&sig->cred_guard_mutex); | 927 | mutex_init(&sig->cred_guard_mutex); |
915 | 928 | ||
@@ -1410,23 +1423,6 @@ long do_fork(unsigned long clone_flags, | |||
1410 | } | 1423 | } |
1411 | 1424 | ||
1412 | /* | 1425 | /* |
1413 | * We hope to recycle these flags after 2.6.26 | ||
1414 | */ | ||
1415 | if (unlikely(clone_flags & CLONE_STOPPED)) { | ||
1416 | static int __read_mostly count = 100; | ||
1417 | |||
1418 | if (count > 0 && printk_ratelimit()) { | ||
1419 | char comm[TASK_COMM_LEN]; | ||
1420 | |||
1421 | count--; | ||
1422 | printk(KERN_INFO "fork(): process `%s' used deprecated " | ||
1423 | "clone flags 0x%lx\n", | ||
1424 | get_task_comm(comm, current), | ||
1425 | clone_flags & CLONE_STOPPED); | ||
1426 | } | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * When called from kernel_thread, don't do user tracing stuff. | 1426 | * When called from kernel_thread, don't do user tracing stuff. |
1431 | */ | 1427 | */ |
1432 | if (likely(user_mode(regs))) | 1428 | if (likely(user_mode(regs))) |
@@ -1464,16 +1460,7 @@ long do_fork(unsigned long clone_flags, | |||
1464 | */ | 1460 | */ |
1465 | p->flags &= ~PF_STARTING; | 1461 | p->flags &= ~PF_STARTING; |
1466 | 1462 | ||
1467 | if (unlikely(clone_flags & CLONE_STOPPED)) { | 1463 | wake_up_new_task(p, clone_flags); |
1468 | /* | ||
1469 | * We'll start up with an immediate SIGSTOP. | ||
1470 | */ | ||
1471 | sigaddset(&p->pending.signal, SIGSTOP); | ||
1472 | set_tsk_thread_flag(p, TIF_SIGPENDING); | ||
1473 | __set_task_state(p, TASK_STOPPED); | ||
1474 | } else { | ||
1475 | wake_up_new_task(p, clone_flags); | ||
1476 | } | ||
1477 | 1464 | ||
1478 | tracehook_report_clone_complete(trace, regs, | 1465 | tracehook_report_clone_complete(trace, regs, |
1479 | clone_flags, nr, p); | 1466 | clone_flags, nr, p); |
diff --git a/kernel/futex.c b/kernel/futex.c index 3019b92e6917..b766d28accd6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) | |||
233 | { | 233 | { |
234 | unsigned long address = (unsigned long)uaddr; | 234 | unsigned long address = (unsigned long)uaddr; |
235 | struct mm_struct *mm = current->mm; | 235 | struct mm_struct *mm = current->mm; |
236 | struct page *page; | 236 | struct page *page, *page_head; |
237 | int err; | 237 | int err; |
238 | 238 | ||
239 | /* | 239 | /* |
@@ -265,11 +265,46 @@ again: | |||
265 | if (err < 0) | 265 | if (err < 0) |
266 | return err; | 266 | return err; |
267 | 267 | ||
268 | page = compound_head(page); | 268 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
269 | lock_page(page); | 269 | page_head = page; |
270 | if (!page->mapping) { | 270 | if (unlikely(PageTail(page))) { |
271 | unlock_page(page); | ||
272 | put_page(page); | 271 | put_page(page); |
272 | /* serialize against __split_huge_page_splitting() */ | ||
273 | local_irq_disable(); | ||
274 | if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) { | ||
275 | page_head = compound_head(page); | ||
276 | /* | ||
277 | * page_head is valid pointer but we must pin | ||
278 | * it before taking the PG_lock and/or | ||
279 | * PG_compound_lock. The moment we re-enable | ||
280 | * irqs __split_huge_page_splitting() can | ||
281 | * return and the head page can be freed from | ||
282 | * under us. We can't take the PG_lock and/or | ||
283 | * PG_compound_lock on a page that could be | ||
284 | * freed from under us. | ||
285 | */ | ||
286 | if (page != page_head) { | ||
287 | get_page(page_head); | ||
288 | put_page(page); | ||
289 | } | ||
290 | local_irq_enable(); | ||
291 | } else { | ||
292 | local_irq_enable(); | ||
293 | goto again; | ||
294 | } | ||
295 | } | ||
296 | #else | ||
297 | page_head = compound_head(page); | ||
298 | if (page != page_head) { | ||
299 | get_page(page_head); | ||
300 | put_page(page); | ||
301 | } | ||
302 | #endif | ||
303 | |||
304 | lock_page(page_head); | ||
305 | if (!page_head->mapping) { | ||
306 | unlock_page(page_head); | ||
307 | put_page(page_head); | ||
273 | goto again; | 308 | goto again; |
274 | } | 309 | } |
275 | 310 | ||
@@ -280,20 +315,20 @@ again: | |||
280 | * it's a read-only handle, it's expected that futexes attach to | 315 | * it's a read-only handle, it's expected that futexes attach to |
281 | * the object not the particular process. | 316 | * the object not the particular process. |
282 | */ | 317 | */ |
283 | if (PageAnon(page)) { | 318 | if (PageAnon(page_head)) { |
284 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ | 319 | key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ |
285 | key->private.mm = mm; | 320 | key->private.mm = mm; |
286 | key->private.address = address; | 321 | key->private.address = address; |
287 | } else { | 322 | } else { |
288 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ | 323 | key->both.offset |= FUT_OFF_INODE; /* inode-based key */ |
289 | key->shared.inode = page->mapping->host; | 324 | key->shared.inode = page_head->mapping->host; |
290 | key->shared.pgoff = page->index; | 325 | key->shared.pgoff = page_head->index; |
291 | } | 326 | } |
292 | 327 | ||
293 | get_futex_key_refs(key); | 328 | get_futex_key_refs(key); |
294 | 329 | ||
295 | unlock_page(page); | 330 | unlock_page(page_head); |
296 | put_page(page); | 331 | put_page(page_head); |
297 | return 0; | 332 | return 0; |
298 | } | 333 | } |
299 | 334 | ||
@@ -791,10 +826,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) | |||
791 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); | 826 | new_owner = rt_mutex_next_owner(&pi_state->pi_mutex); |
792 | 827 | ||
793 | /* | 828 | /* |
794 | * This happens when we have stolen the lock and the original | 829 | * It is possible that the next waiter (the one that brought |
795 | * pending owner did not enqueue itself back on the rt_mutex. | 830 | * this owner to the kernel) timed out and is no longer |
796 | * Thats not a tragedy. We know that way, that a lock waiter | 831 | * waiting on the lock. |
797 | * is on the fly. We make the futex_q waiter the pending owner. | ||
798 | */ | 832 | */ |
799 | if (!new_owner) | 833 | if (!new_owner) |
800 | new_owner = this->task; | 834 | new_owner = this->task; |
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 31d766bf5d2e..8e42fec7686d 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig | |||
@@ -9,9 +9,6 @@ menu "IRQ subsystem" | |||
9 | config GENERIC_HARDIRQS | 9 | config GENERIC_HARDIRQS |
10 | def_bool y | 10 | def_bool y |
11 | 11 | ||
12 | config GENERIC_HARDIRQS_NO__DO_IRQ | ||
13 | def_bool y | ||
14 | |||
15 | # Select this to disable the deprecated stuff | 12 | # Select this to disable the deprecated stuff |
16 | config GENERIC_HARDIRQS_NO_DEPRECATED | 13 | config GENERIC_HARDIRQS_NO_DEPRECATED |
17 | def_bool n | 14 | def_bool n |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e2347eb63306..3540a7190122 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
118 | 118 | ||
119 | return retval; | 119 | return retval; |
120 | } | 120 | } |
121 | |||
122 | #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ | ||
123 | |||
124 | #ifdef CONFIG_ENABLE_WARN_DEPRECATED | ||
125 | # warning __do_IRQ is deprecated. Please convert to proper flow handlers | ||
126 | #endif | ||
127 | |||
128 | /** | ||
129 | * __do_IRQ - original all in one highlevel IRQ handler | ||
130 | * @irq: the interrupt number | ||
131 | * | ||
132 | * __do_IRQ handles all normal device IRQ's (the special | ||
133 | * SMP cross-CPU interrupts have their own specific | ||
134 | * handlers). | ||
135 | * | ||
136 | * This is the original x86 implementation which is used for every | ||
137 | * interrupt type. | ||
138 | */ | ||
139 | unsigned int __do_IRQ(unsigned int irq) | ||
140 | { | ||
141 | struct irq_desc *desc = irq_to_desc(irq); | ||
142 | struct irqaction *action; | ||
143 | unsigned int status; | ||
144 | |||
145 | kstat_incr_irqs_this_cpu(irq, desc); | ||
146 | |||
147 | if (CHECK_IRQ_PER_CPU(desc->status)) { | ||
148 | irqreturn_t action_ret; | ||
149 | |||
150 | /* | ||
151 | * No locking required for CPU-local interrupts: | ||
152 | */ | ||
153 | if (desc->irq_data.chip->ack) | ||
154 | desc->irq_data.chip->ack(irq); | ||
155 | if (likely(!(desc->status & IRQ_DISABLED))) { | ||
156 | action_ret = handle_IRQ_event(irq, desc->action); | ||
157 | if (!noirqdebug) | ||
158 | note_interrupt(irq, desc, action_ret); | ||
159 | } | ||
160 | desc->irq_data.chip->end(irq); | ||
161 | return 1; | ||
162 | } | ||
163 | |||
164 | raw_spin_lock(&desc->lock); | ||
165 | if (desc->irq_data.chip->ack) | ||
166 | desc->irq_data.chip->ack(irq); | ||
167 | /* | ||
168 | * REPLAY is when Linux resends an IRQ that was dropped earlier | ||
169 | * WAITING is used by probe to mark irqs that are being tested | ||
170 | */ | ||
171 | status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); | ||
172 | status |= IRQ_PENDING; /* we _want_ to handle it */ | ||
173 | |||
174 | /* | ||
175 | * If the IRQ is disabled for whatever reason, we cannot | ||
176 | * use the action we have. | ||
177 | */ | ||
178 | action = NULL; | ||
179 | if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { | ||
180 | action = desc->action; | ||
181 | status &= ~IRQ_PENDING; /* we commit to handling */ | ||
182 | status |= IRQ_INPROGRESS; /* we are handling it */ | ||
183 | } | ||
184 | desc->status = status; | ||
185 | |||
186 | /* | ||
187 | * If there is no IRQ handler or it was disabled, exit early. | ||
188 | * Since we set PENDING, if another processor is handling | ||
189 | * a different instance of this same irq, the other processor | ||
190 | * will take care of it. | ||
191 | */ | ||
192 | if (unlikely(!action)) | ||
193 | goto out; | ||
194 | |||
195 | /* | ||
196 | * Edge triggered interrupts need to remember | ||
197 | * pending events. | ||
198 | * This applies to any hw interrupts that allow a second | ||
199 | * instance of the same irq to arrive while we are in do_IRQ | ||
200 | * or in the handler. But the code here only handles the _second_ | ||
201 | * instance of the irq, not the third or fourth. So it is mostly | ||
202 | * useful for irq hardware that does not mask cleanly in an | ||
203 | * SMP environment. | ||
204 | */ | ||
205 | for (;;) { | ||
206 | irqreturn_t action_ret; | ||
207 | |||
208 | raw_spin_unlock(&desc->lock); | ||
209 | |||
210 | action_ret = handle_IRQ_event(irq, action); | ||
211 | if (!noirqdebug) | ||
212 | note_interrupt(irq, desc, action_ret); | ||
213 | |||
214 | raw_spin_lock(&desc->lock); | ||
215 | if (likely(!(desc->status & IRQ_PENDING))) | ||
216 | break; | ||
217 | desc->status &= ~IRQ_PENDING; | ||
218 | } | ||
219 | desc->status &= ~IRQ_INPROGRESS; | ||
220 | |||
221 | out: | ||
222 | /* | ||
223 | * The ->end() handler has to deal with interrupts which got | ||
224 | * disabled while the handler was running. | ||
225 | */ | ||
226 | desc->irq_data.chip->end(irq); | ||
227 | raw_spin_unlock(&desc->lock); | ||
228 | |||
229 | return 1; | ||
230 | } | ||
231 | #endif | ||
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9988d03797f5..282f20230e67 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -72,6 +72,8 @@ static inline int desc_node(struct irq_desc *desc) { return 0; } | |||
72 | 72 | ||
73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | 73 | static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) |
74 | { | 74 | { |
75 | int cpu; | ||
76 | |||
75 | desc->irq_data.irq = irq; | 77 | desc->irq_data.irq = irq; |
76 | desc->irq_data.chip = &no_irq_chip; | 78 | desc->irq_data.chip = &no_irq_chip; |
77 | desc->irq_data.chip_data = NULL; | 79 | desc->irq_data.chip_data = NULL; |
@@ -83,7 +85,8 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) | |||
83 | desc->irq_count = 0; | 85 | desc->irq_count = 0; |
84 | desc->irqs_unhandled = 0; | 86 | desc->irqs_unhandled = 0; |
85 | desc->name = NULL; | 87 | desc->name = NULL; |
86 | memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs))); | 88 | for_each_possible_cpu(cpu) |
89 | *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; | ||
87 | desc_smp_init(desc, node); | 90 | desc_smp_init(desc, node); |
88 | } | 91 | } |
89 | 92 | ||
@@ -133,8 +136,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
133 | if (!desc) | 136 | if (!desc) |
134 | return NULL; | 137 | return NULL; |
135 | /* allocate based on nr_cpu_ids */ | 138 | /* allocate based on nr_cpu_ids */ |
136 | desc->kstat_irqs = kzalloc_node(nr_cpu_ids * sizeof(*desc->kstat_irqs), | 139 | desc->kstat_irqs = alloc_percpu(unsigned int); |
137 | gfp, node); | ||
138 | if (!desc->kstat_irqs) | 140 | if (!desc->kstat_irqs) |
139 | goto err_desc; | 141 | goto err_desc; |
140 | 142 | ||
@@ -149,7 +151,7 @@ static struct irq_desc *alloc_desc(int irq, int node) | |||
149 | return desc; | 151 | return desc; |
150 | 152 | ||
151 | err_kstat: | 153 | err_kstat: |
152 | kfree(desc->kstat_irqs); | 154 | free_percpu(desc->kstat_irqs); |
153 | err_desc: | 155 | err_desc: |
154 | kfree(desc); | 156 | kfree(desc); |
155 | return NULL; | 157 | return NULL; |
@@ -166,7 +168,7 @@ static void free_desc(unsigned int irq) | |||
166 | mutex_unlock(&sparse_irq_lock); | 168 | mutex_unlock(&sparse_irq_lock); |
167 | 169 | ||
168 | free_masks(desc); | 170 | free_masks(desc); |
169 | kfree(desc->kstat_irqs); | 171 | free_percpu(desc->kstat_irqs); |
170 | kfree(desc); | 172 | kfree(desc); |
171 | } | 173 | } |
172 | 174 | ||
@@ -234,7 +236,6 @@ struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = { | |||
234 | } | 236 | } |
235 | }; | 237 | }; |
236 | 238 | ||
237 | static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS]; | ||
238 | int __init early_irq_init(void) | 239 | int __init early_irq_init(void) |
239 | { | 240 | { |
240 | int count, i, node = first_online_node; | 241 | int count, i, node = first_online_node; |
@@ -250,7 +251,8 @@ int __init early_irq_init(void) | |||
250 | for (i = 0; i < count; i++) { | 251 | for (i = 0; i < count; i++) { |
251 | desc[i].irq_data.irq = i; | 252 | desc[i].irq_data.irq = i; |
252 | desc[i].irq_data.chip = &no_irq_chip; | 253 | desc[i].irq_data.chip = &no_irq_chip; |
253 | desc[i].kstat_irqs = kstat_irqs_all[i]; | 254 | /* TODO : do this allocation on-demand ... */ |
255 | desc[i].kstat_irqs = alloc_percpu(unsigned int); | ||
254 | alloc_masks(desc + i, GFP_KERNEL, node); | 256 | alloc_masks(desc + i, GFP_KERNEL, node); |
255 | desc_smp_init(desc + i, node); | 257 | desc_smp_init(desc + i, node); |
256 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 258 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
@@ -275,6 +277,22 @@ static void free_desc(unsigned int irq) | |||
275 | 277 | ||
276 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) | 278 | static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) |
277 | { | 279 | { |
280 | #if defined(CONFIG_KSTAT_IRQS_ONDEMAND) | ||
281 | struct irq_desc *desc; | ||
282 | unsigned int i; | ||
283 | |||
284 | for (i = 0; i < cnt; i++) { | ||
285 | desc = irq_to_desc(start + i); | ||
286 | if (desc && !desc->kstat_irqs) { | ||
287 | unsigned int __percpu *stats = alloc_percpu(unsigned int); | ||
288 | |||
289 | if (!stats) | ||
290 | return -1; | ||
291 | if (cmpxchg(&desc->kstat_irqs, NULL, stats) != NULL) | ||
292 | free_percpu(stats); | ||
293 | } | ||
294 | } | ||
295 | #endif | ||
278 | return start; | 296 | return start; |
279 | } | 297 | } |
280 | #endif /* !CONFIG_SPARSE_IRQ */ | 298 | #endif /* !CONFIG_SPARSE_IRQ */ |
@@ -391,7 +409,9 @@ void dynamic_irq_cleanup(unsigned int irq) | |||
391 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) | 409 | unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) |
392 | { | 410 | { |
393 | struct irq_desc *desc = irq_to_desc(irq); | 411 | struct irq_desc *desc = irq_to_desc(irq); |
394 | return desc ? desc->kstat_irqs[cpu] : 0; | 412 | |
413 | return desc && desc->kstat_irqs ? | ||
414 | *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; | ||
395 | } | 415 | } |
396 | 416 | ||
397 | #ifdef CONFIG_GENERIC_HARDIRQS | 417 | #ifdef CONFIG_GENERIC_HARDIRQS |
@@ -401,10 +421,10 @@ unsigned int kstat_irqs(unsigned int irq) | |||
401 | int cpu; | 421 | int cpu; |
402 | int sum = 0; | 422 | int sum = 0; |
403 | 423 | ||
404 | if (!desc) | 424 | if (!desc || !desc->kstat_irqs) |
405 | return 0; | 425 | return 0; |
406 | for_each_possible_cpu(cpu) | 426 | for_each_possible_cpu(cpu) |
407 | sum += desc->kstat_irqs[cpu]; | 427 | sum += *per_cpu_ptr(desc->kstat_irqs, cpu); |
408 | return sum; | 428 | return sum; |
409 | } | 429 | } |
410 | #endif /* CONFIG_GENERIC_HARDIRQS */ | 430 | #endif /* CONFIG_GENERIC_HARDIRQS */ |
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index 1d2541940480..441fd629ff04 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c | |||
@@ -56,6 +56,7 @@ void move_masked_irq(int irq) | |||
56 | void move_native_irq(int irq) | 56 | void move_native_irq(int irq) |
57 | { | 57 | { |
58 | struct irq_desc *desc = irq_to_desc(irq); | 58 | struct irq_desc *desc = irq_to_desc(irq); |
59 | bool masked; | ||
59 | 60 | ||
60 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) | 61 | if (likely(!(desc->status & IRQ_MOVE_PENDING))) |
61 | return; | 62 | return; |
@@ -63,8 +64,15 @@ void move_native_irq(int irq) | |||
63 | if (unlikely(desc->status & IRQ_DISABLED)) | 64 | if (unlikely(desc->status & IRQ_DISABLED)) |
64 | return; | 65 | return; |
65 | 66 | ||
66 | desc->irq_data.chip->irq_mask(&desc->irq_data); | 67 | /* |
68 | * Be careful vs. already masked interrupts. If this is a | ||
69 | * threaded interrupt with ONESHOT set, we can end up with an | ||
70 | * interrupt storm. | ||
71 | */ | ||
72 | masked = desc->status & IRQ_MASKED; | ||
73 | if (!masked) | ||
74 | desc->irq_data.chip->irq_mask(&desc->irq_data); | ||
67 | move_masked_irq(irq); | 75 | move_masked_irq(irq); |
68 | desc->irq_data.chip->irq_unmask(&desc->irq_data); | 76 | if (!masked) |
77 | desc->irq_data.chip->irq_unmask(&desc->irq_data); | ||
69 | } | 78 | } |
70 | |||
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 42ba65dff7d9..0d2058da80f5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -2292,22 +2292,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
2292 | } | 2292 | } |
2293 | 2293 | ||
2294 | /* | 2294 | /* |
2295 | * Debugging helper: via this flag we know that we are in | ||
2296 | * 'early bootup code', and will warn about any invalid irqs-on event: | ||
2297 | */ | ||
2298 | static int early_boot_irqs_enabled; | ||
2299 | |||
2300 | void early_boot_irqs_off(void) | ||
2301 | { | ||
2302 | early_boot_irqs_enabled = 0; | ||
2303 | } | ||
2304 | |||
2305 | void early_boot_irqs_on(void) | ||
2306 | { | ||
2307 | early_boot_irqs_enabled = 1; | ||
2308 | } | ||
2309 | |||
2310 | /* | ||
2311 | * Hardirqs will be enabled: | 2295 | * Hardirqs will be enabled: |
2312 | */ | 2296 | */ |
2313 | void trace_hardirqs_on_caller(unsigned long ip) | 2297 | void trace_hardirqs_on_caller(unsigned long ip) |
@@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2319 | if (unlikely(!debug_locks || current->lockdep_recursion)) | 2303 | if (unlikely(!debug_locks || current->lockdep_recursion)) |
2320 | return; | 2304 | return; |
2321 | 2305 | ||
2322 | if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) | 2306 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) |
2323 | return; | 2307 | return; |
2324 | 2308 | ||
2325 | if (unlikely(curr->hardirqs_enabled)) { | 2309 | if (unlikely(curr->hardirqs_enabled)) { |
diff --git a/kernel/module.c b/kernel/module.c index 34e00b708fad..efa290ea94bf 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -2460,9 +2460,9 @@ static void find_module_sections(struct module *mod, struct load_info *info) | |||
2460 | #endif | 2460 | #endif |
2461 | 2461 | ||
2462 | #ifdef CONFIG_TRACEPOINTS | 2462 | #ifdef CONFIG_TRACEPOINTS |
2463 | mod->tracepoints = section_objs(info, "__tracepoints", | 2463 | mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs", |
2464 | sizeof(*mod->tracepoints), | 2464 | sizeof(*mod->tracepoints_ptrs), |
2465 | &mod->num_tracepoints); | 2465 | &mod->num_tracepoints); |
2466 | #endif | 2466 | #endif |
2467 | #ifdef HAVE_JUMP_LABEL | 2467 | #ifdef HAVE_JUMP_LABEL |
2468 | mod->jump_entries = section_objs(info, "__jump_table", | 2468 | mod->jump_entries = section_objs(info, "__jump_table", |
@@ -3393,7 +3393,7 @@ void module_layout(struct module *mod, | |||
3393 | struct modversion_info *ver, | 3393 | struct modversion_info *ver, |
3394 | struct kernel_param *kp, | 3394 | struct kernel_param *kp, |
3395 | struct kernel_symbol *ks, | 3395 | struct kernel_symbol *ks, |
3396 | struct tracepoint *tp) | 3396 | struct tracepoint * const *tp) |
3397 | { | 3397 | { |
3398 | } | 3398 | } |
3399 | EXPORT_SYMBOL(module_layout); | 3399 | EXPORT_SYMBOL(module_layout); |
@@ -3407,8 +3407,8 @@ void module_update_tracepoints(void) | |||
3407 | mutex_lock(&module_mutex); | 3407 | mutex_lock(&module_mutex); |
3408 | list_for_each_entry(mod, &modules, list) | 3408 | list_for_each_entry(mod, &modules, list) |
3409 | if (!mod->taints) | 3409 | if (!mod->taints) |
3410 | tracepoint_update_probe_range(mod->tracepoints, | 3410 | tracepoint_update_probe_range(mod->tracepoints_ptrs, |
3411 | mod->tracepoints + mod->num_tracepoints); | 3411 | mod->tracepoints_ptrs + mod->num_tracepoints); |
3412 | mutex_unlock(&module_mutex); | 3412 | mutex_unlock(&module_mutex); |
3413 | } | 3413 | } |
3414 | 3414 | ||
@@ -3432,8 +3432,8 @@ int module_get_iter_tracepoints(struct tracepoint_iter *iter) | |||
3432 | else if (iter_mod > iter->module) | 3432 | else if (iter_mod > iter->module) |
3433 | iter->tracepoint = NULL; | 3433 | iter->tracepoint = NULL; |
3434 | found = tracepoint_get_iter_range(&iter->tracepoint, | 3434 | found = tracepoint_get_iter_range(&iter->tracepoint, |
3435 | iter_mod->tracepoints, | 3435 | iter_mod->tracepoints_ptrs, |
3436 | iter_mod->tracepoints | 3436 | iter_mod->tracepoints_ptrs |
3437 | + iter_mod->num_tracepoints); | 3437 | + iter_mod->num_tracepoints); |
3438 | if (found) { | 3438 | if (found) { |
3439 | iter->module = iter_mod; | 3439 | iter->module = iter_mod; |
diff --git a/kernel/panic.c b/kernel/panic.c index 4c13b1a88ebb..991bb87a1704 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -34,6 +34,7 @@ static int pause_on_oops_flag; | |||
34 | static DEFINE_SPINLOCK(pause_on_oops_lock); | 34 | static DEFINE_SPINLOCK(pause_on_oops_lock); |
35 | 35 | ||
36 | int panic_timeout; | 36 | int panic_timeout; |
37 | EXPORT_SYMBOL_GPL(panic_timeout); | ||
37 | 38 | ||
38 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); | 39 | ATOMIC_NOTIFIER_HEAD(panic_notifier_list); |
39 | 40 | ||
diff --git a/kernel/params.c b/kernel/params.c index 08107d181758..0da1411222b9 100644 --- a/kernel/params.c +++ b/kernel/params.c | |||
@@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) | |||
719 | params[i].ops->free(params[i].arg); | 719 | params[i].ops->free(params[i].arg); |
720 | } | 720 | } |
721 | 721 | ||
722 | static void __init kernel_add_sysfs_param(const char *name, | 722 | static struct module_kobject * __init locate_module_kobject(const char *name) |
723 | struct kernel_param *kparam, | ||
724 | unsigned int name_skip) | ||
725 | { | 723 | { |
726 | struct module_kobject *mk; | 724 | struct module_kobject *mk; |
727 | struct kobject *kobj; | 725 | struct kobject *kobj; |
@@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name, | |||
729 | 727 | ||
730 | kobj = kset_find_obj(module_kset, name); | 728 | kobj = kset_find_obj(module_kset, name); |
731 | if (kobj) { | 729 | if (kobj) { |
732 | /* We already have one. Remove params so we can add more. */ | ||
733 | mk = to_module_kobject(kobj); | 730 | mk = to_module_kobject(kobj); |
734 | /* We need to remove it before adding parameters. */ | ||
735 | sysfs_remove_group(&mk->kobj, &mk->mp->grp); | ||
736 | } else { | 731 | } else { |
737 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); | 732 | mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); |
738 | BUG_ON(!mk); | 733 | BUG_ON(!mk); |
@@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name, | |||
743 | "%s", name); | 738 | "%s", name); |
744 | if (err) { | 739 | if (err) { |
745 | kobject_put(&mk->kobj); | 740 | kobject_put(&mk->kobj); |
746 | printk(KERN_ERR "Module '%s' failed add to sysfs, " | 741 | printk(KERN_ERR |
747 | "error number %d\n", name, err); | 742 | "Module '%s' failed add to sysfs, error number %d\n", |
748 | printk(KERN_ERR "The system will be unstable now.\n"); | 743 | name, err); |
749 | return; | 744 | printk(KERN_ERR |
745 | "The system will be unstable now.\n"); | ||
746 | return NULL; | ||
750 | } | 747 | } |
751 | /* So that exit path is even. */ | 748 | |
749 | /* So that we hold reference in both cases. */ | ||
752 | kobject_get(&mk->kobj); | 750 | kobject_get(&mk->kobj); |
753 | } | 751 | } |
754 | 752 | ||
753 | return mk; | ||
754 | } | ||
755 | |||
756 | static void __init kernel_add_sysfs_param(const char *name, | ||
757 | struct kernel_param *kparam, | ||
758 | unsigned int name_skip) | ||
759 | { | ||
760 | struct module_kobject *mk; | ||
761 | int err; | ||
762 | |||
763 | mk = locate_module_kobject(name); | ||
764 | if (!mk) | ||
765 | return; | ||
766 | |||
767 | /* We need to remove old parameters before adding more. */ | ||
768 | if (mk->mp) | ||
769 | sysfs_remove_group(&mk->kobj, &mk->mp->grp); | ||
770 | |||
755 | /* These should not fail at boot. */ | 771 | /* These should not fail at boot. */ |
756 | err = add_sysfs_param(mk, kparam, kparam->name + name_skip); | 772 | err = add_sysfs_param(mk, kparam, kparam->name + name_skip); |
757 | BUG_ON(err); | 773 | BUG_ON(err); |
@@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void) | |||
796 | } | 812 | } |
797 | } | 813 | } |
798 | 814 | ||
815 | ssize_t __modver_version_show(struct module_attribute *mattr, | ||
816 | struct module *mod, char *buf) | ||
817 | { | ||
818 | struct module_version_attribute *vattr = | ||
819 | container_of(mattr, struct module_version_attribute, mattr); | ||
820 | |||
821 | return sprintf(buf, "%s\n", vattr->version); | ||
822 | } | ||
823 | |||
824 | extern struct module_version_attribute __start___modver[], __stop___modver[]; | ||
825 | |||
826 | static void __init version_sysfs_builtin(void) | ||
827 | { | ||
828 | const struct module_version_attribute *vattr; | ||
829 | struct module_kobject *mk; | ||
830 | int err; | ||
831 | |||
832 | for (vattr = __start___modver; vattr < __stop___modver; vattr++) { | ||
833 | mk = locate_module_kobject(vattr->module_name); | ||
834 | if (mk) { | ||
835 | err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); | ||
836 | kobject_uevent(&mk->kobj, KOBJ_ADD); | ||
837 | kobject_put(&mk->kobj); | ||
838 | } | ||
839 | } | ||
840 | } | ||
799 | 841 | ||
800 | /* module-related sysfs stuff */ | 842 | /* module-related sysfs stuff */ |
801 | 843 | ||
@@ -875,6 +917,7 @@ static int __init param_sysfs_init(void) | |||
875 | } | 917 | } |
876 | module_sysfs_initialized = 1; | 918 | module_sysfs_initialized = 1; |
877 | 919 | ||
920 | version_sysfs_builtin(); | ||
878 | param_sysfs_builtin(); | 921 | param_sysfs_builtin(); |
879 | 922 | ||
880 | return 0; | 923 | return 0; |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 05ebe841270b..999835b6112b 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -1901,11 +1901,12 @@ static void __perf_event_read(void *info) | |||
1901 | return; | 1901 | return; |
1902 | 1902 | ||
1903 | raw_spin_lock(&ctx->lock); | 1903 | raw_spin_lock(&ctx->lock); |
1904 | update_context_time(ctx); | 1904 | if (ctx->is_active) |
1905 | update_context_time(ctx); | ||
1905 | update_event_times(event); | 1906 | update_event_times(event); |
1907 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1908 | event->pmu->read(event); | ||
1906 | raw_spin_unlock(&ctx->lock); | 1909 | raw_spin_unlock(&ctx->lock); |
1907 | |||
1908 | event->pmu->read(event); | ||
1909 | } | 1910 | } |
1910 | 1911 | ||
1911 | static inline u64 perf_event_count(struct perf_event *event) | 1912 | static inline u64 perf_event_count(struct perf_event *event) |
@@ -1999,8 +2000,7 @@ static int alloc_callchain_buffers(void) | |||
1999 | * accessed from NMI. Use a temporary manual per cpu allocation | 2000 | * accessed from NMI. Use a temporary manual per cpu allocation |
2000 | * until that gets sorted out. | 2001 | * until that gets sorted out. |
2001 | */ | 2002 | */ |
2002 | size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * | 2003 | size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); |
2003 | num_possible_cpus(); | ||
2004 | 2004 | ||
2005 | entries = kzalloc(size, GFP_KERNEL); | 2005 | entries = kzalloc(size, GFP_KERNEL); |
2006 | if (!entries) | 2006 | if (!entries) |
@@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid) | |||
2201 | if (!task) | 2201 | if (!task) |
2202 | return ERR_PTR(-ESRCH); | 2202 | return ERR_PTR(-ESRCH); |
2203 | 2203 | ||
2204 | /* | ||
2205 | * Can't attach events to a dying task. | ||
2206 | */ | ||
2207 | err = -ESRCH; | ||
2208 | if (task->flags & PF_EXITING) | ||
2209 | goto errout; | ||
2210 | |||
2211 | /* Reuse ptrace permission checks for now. */ | 2204 | /* Reuse ptrace permission checks for now. */ |
2212 | err = -EACCES; | 2205 | err = -EACCES; |
2213 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | 2206 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
@@ -2228,14 +2221,11 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
2228 | unsigned long flags; | 2221 | unsigned long flags; |
2229 | int ctxn, err; | 2222 | int ctxn, err; |
2230 | 2223 | ||
2231 | if (!task && cpu != -1) { | 2224 | if (!task) { |
2232 | /* Must be root to operate on a CPU event: */ | 2225 | /* Must be root to operate on a CPU event: */ |
2233 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | 2226 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) |
2234 | return ERR_PTR(-EACCES); | 2227 | return ERR_PTR(-EACCES); |
2235 | 2228 | ||
2236 | if (cpu < 0 || cpu >= nr_cpumask_bits) | ||
2237 | return ERR_PTR(-EINVAL); | ||
2238 | |||
2239 | /* | 2229 | /* |
2240 | * We could be clever and allow to attach a event to an | 2230 | * We could be clever and allow to attach a event to an |
2241 | * offline CPU and activate it when the CPU comes up, but | 2231 | * offline CPU and activate it when the CPU comes up, but |
@@ -2271,14 +2261,27 @@ retry: | |||
2271 | 2261 | ||
2272 | get_ctx(ctx); | 2262 | get_ctx(ctx); |
2273 | 2263 | ||
2274 | if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { | 2264 | err = 0; |
2275 | /* | 2265 | mutex_lock(&task->perf_event_mutex); |
2276 | * We raced with some other task; use | 2266 | /* |
2277 | * the context they set. | 2267 | * If it has already passed perf_event_exit_task(). |
2278 | */ | 2268 | * we must see PF_EXITING, it takes this mutex too. |
2269 | */ | ||
2270 | if (task->flags & PF_EXITING) | ||
2271 | err = -ESRCH; | ||
2272 | else if (task->perf_event_ctxp[ctxn]) | ||
2273 | err = -EAGAIN; | ||
2274 | else | ||
2275 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | ||
2276 | mutex_unlock(&task->perf_event_mutex); | ||
2277 | |||
2278 | if (unlikely(err)) { | ||
2279 | put_task_struct(task); | 2279 | put_task_struct(task); |
2280 | kfree(ctx); | 2280 | kfree(ctx); |
2281 | goto retry; | 2281 | |
2282 | if (err == -EAGAIN) | ||
2283 | goto retry; | ||
2284 | goto errout; | ||
2282 | } | 2285 | } |
2283 | } | 2286 | } |
2284 | 2287 | ||
@@ -5377,6 +5380,8 @@ free_dev: | |||
5377 | goto out; | 5380 | goto out; |
5378 | } | 5381 | } |
5379 | 5382 | ||
5383 | static struct lock_class_key cpuctx_mutex; | ||
5384 | |||
5380 | int perf_pmu_register(struct pmu *pmu, char *name, int type) | 5385 | int perf_pmu_register(struct pmu *pmu, char *name, int type) |
5381 | { | 5386 | { |
5382 | int cpu, ret; | 5387 | int cpu, ret; |
@@ -5425,6 +5430,7 @@ skip_type: | |||
5425 | 5430 | ||
5426 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 5431 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
5427 | __perf_event_init_context(&cpuctx->ctx); | 5432 | __perf_event_init_context(&cpuctx->ctx); |
5433 | lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); | ||
5428 | cpuctx->ctx.type = cpu_context; | 5434 | cpuctx->ctx.type = cpu_context; |
5429 | cpuctx->ctx.pmu = pmu; | 5435 | cpuctx->ctx.pmu = pmu; |
5430 | cpuctx->jiffies_interval = 1; | 5436 | cpuctx->jiffies_interval = 1; |
@@ -5541,6 +5547,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5541 | struct hw_perf_event *hwc; | 5547 | struct hw_perf_event *hwc; |
5542 | long err; | 5548 | long err; |
5543 | 5549 | ||
5550 | if ((unsigned)cpu >= nr_cpu_ids) { | ||
5551 | if (!task || cpu != -1) | ||
5552 | return ERR_PTR(-EINVAL); | ||
5553 | } | ||
5554 | |||
5544 | event = kzalloc(sizeof(*event), GFP_KERNEL); | 5555 | event = kzalloc(sizeof(*event), GFP_KERNEL); |
5545 | if (!event) | 5556 | if (!event) |
5546 | return ERR_PTR(-ENOMEM); | 5557 | return ERR_PTR(-ENOMEM); |
@@ -5589,7 +5600,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
5589 | 5600 | ||
5590 | if (!overflow_handler && parent_event) | 5601 | if (!overflow_handler && parent_event) |
5591 | overflow_handler = parent_event->overflow_handler; | 5602 | overflow_handler = parent_event->overflow_handler; |
5592 | 5603 | ||
5593 | event->overflow_handler = overflow_handler; | 5604 | event->overflow_handler = overflow_handler; |
5594 | 5605 | ||
5595 | if (attr->disabled) | 5606 | if (attr->disabled) |
@@ -6125,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
6125 | * scheduled, so we are now safe from rescheduling changing | 6136 | * scheduled, so we are now safe from rescheduling changing |
6126 | * our context. | 6137 | * our context. |
6127 | */ | 6138 | */ |
6128 | child_ctx = child->perf_event_ctxp[ctxn]; | 6139 | child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); |
6129 | task_ctx_sched_out(child_ctx, EVENT_ALL); | 6140 | task_ctx_sched_out(child_ctx, EVENT_ALL); |
6130 | 6141 | ||
6131 | /* | 6142 | /* |
@@ -6438,11 +6449,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6438 | unsigned long flags; | 6449 | unsigned long flags; |
6439 | int ret = 0; | 6450 | int ret = 0; |
6440 | 6451 | ||
6441 | child->perf_event_ctxp[ctxn] = NULL; | ||
6442 | |||
6443 | mutex_init(&child->perf_event_mutex); | ||
6444 | INIT_LIST_HEAD(&child->perf_event_list); | ||
6445 | |||
6446 | if (likely(!parent->perf_event_ctxp[ctxn])) | 6452 | if (likely(!parent->perf_event_ctxp[ctxn])) |
6447 | return 0; | 6453 | return 0; |
6448 | 6454 | ||
@@ -6494,7 +6500,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6494 | 6500 | ||
6495 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); | 6501 | raw_spin_lock_irqsave(&parent_ctx->lock, flags); |
6496 | parent_ctx->rotate_disable = 0; | 6502 | parent_ctx->rotate_disable = 0; |
6497 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6498 | 6503 | ||
6499 | child_ctx = child->perf_event_ctxp[ctxn]; | 6504 | child_ctx = child->perf_event_ctxp[ctxn]; |
6500 | 6505 | ||
@@ -6502,12 +6507,11 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6502 | /* | 6507 | /* |
6503 | * Mark the child context as a clone of the parent | 6508 | * Mark the child context as a clone of the parent |
6504 | * context, or of whatever the parent is a clone of. | 6509 | * context, or of whatever the parent is a clone of. |
6505 | * Note that if the parent is a clone, it could get | 6510 | * |
6506 | * uncloned at any point, but that doesn't matter | 6511 | * Note that if the parent is a clone, the holding of |
6507 | * because the list of events and the generation | 6512 | * parent_ctx->lock avoids it from being uncloned. |
6508 | * count can't have changed since we took the mutex. | ||
6509 | */ | 6513 | */ |
6510 | cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); | 6514 | cloned_ctx = parent_ctx->parent_ctx; |
6511 | if (cloned_ctx) { | 6515 | if (cloned_ctx) { |
6512 | child_ctx->parent_ctx = cloned_ctx; | 6516 | child_ctx->parent_ctx = cloned_ctx; |
6513 | child_ctx->parent_gen = parent_ctx->parent_gen; | 6517 | child_ctx->parent_gen = parent_ctx->parent_gen; |
@@ -6518,6 +6522,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6518 | get_ctx(child_ctx->parent_ctx); | 6522 | get_ctx(child_ctx->parent_ctx); |
6519 | } | 6523 | } |
6520 | 6524 | ||
6525 | raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); | ||
6521 | mutex_unlock(&parent_ctx->mutex); | 6526 | mutex_unlock(&parent_ctx->mutex); |
6522 | 6527 | ||
6523 | perf_unpin_context(parent_ctx); | 6528 | perf_unpin_context(parent_ctx); |
@@ -6532,6 +6537,10 @@ int perf_event_init_task(struct task_struct *child) | |||
6532 | { | 6537 | { |
6533 | int ctxn, ret; | 6538 | int ctxn, ret; |
6534 | 6539 | ||
6540 | memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); | ||
6541 | mutex_init(&child->perf_event_mutex); | ||
6542 | INIT_LIST_HEAD(&child->perf_event_list); | ||
6543 | |||
6535 | for_each_task_context_nr(ctxn) { | 6544 | for_each_task_context_nr(ctxn) { |
6536 | ret = perf_event_init_context(child, ctxn); | 6545 | ret = perf_event_init_context(child, ctxn); |
6537 | if (ret) | 6546 | if (ret) |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index a5aff3ebad38..265729966ece 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -100,13 +100,9 @@ config PM_SLEEP_ADVANCED_DEBUG | |||
100 | depends on PM_ADVANCED_DEBUG | 100 | depends on PM_ADVANCED_DEBUG |
101 | default n | 101 | default n |
102 | 102 | ||
103 | config SUSPEND_NVS | ||
104 | bool | ||
105 | |||
106 | config SUSPEND | 103 | config SUSPEND |
107 | bool "Suspend to RAM and standby" | 104 | bool "Suspend to RAM and standby" |
108 | depends on PM && ARCH_SUSPEND_POSSIBLE | 105 | depends on PM && ARCH_SUSPEND_POSSIBLE |
109 | select SUSPEND_NVS if HAS_IOMEM | ||
110 | default y | 106 | default y |
111 | ---help--- | 107 | ---help--- |
112 | Allow the system to enter sleep states in which main memory is | 108 | Allow the system to enter sleep states in which main memory is |
@@ -140,7 +136,6 @@ config HIBERNATION | |||
140 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE | 136 | depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE |
141 | select LZO_COMPRESS | 137 | select LZO_COMPRESS |
142 | select LZO_DECOMPRESS | 138 | select LZO_DECOMPRESS |
143 | select SUSPEND_NVS if HAS_IOMEM | ||
144 | ---help--- | 139 | ---help--- |
145 | Enable the suspend to disk (STD) functionality, which is usually | 140 | Enable the suspend to disk (STD) functionality, which is usually |
146 | called "hibernation" in user interfaces. STD checkpoints the | 141 | called "hibernation" in user interfaces. STD checkpoints the |
diff --git a/kernel/power/Makefile b/kernel/power/Makefile index b75597235d85..c350e18b53e3 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile | |||
@@ -7,6 +7,5 @@ obj-$(CONFIG_SUSPEND) += suspend.o | |||
7 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o | 7 | obj-$(CONFIG_PM_TEST_SUSPEND) += suspend_test.o |
8 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ | 8 | obj-$(CONFIG_HIBERNATION) += hibernate.o snapshot.o swap.o user.o \ |
9 | block_io.o | 9 | block_io.o |
10 | obj-$(CONFIG_SUSPEND_NVS) += nvs.o | ||
11 | 10 | ||
12 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o | 11 | obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o |
diff --git a/kernel/power/nvs.c b/kernel/power/nvs.c deleted file mode 100644 index 1836db60bbb6..000000000000 --- a/kernel/power/nvs.c +++ /dev/null | |||
@@ -1,136 +0,0 @@ | |||
1 | /* | ||
2 | * linux/kernel/power/hibernate_nvs.c - Routines for handling NVS memory | ||
3 | * | ||
4 | * Copyright (C) 2008,2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. | ||
5 | * | ||
6 | * This file is released under the GPLv2. | ||
7 | */ | ||
8 | |||
9 | #include <linux/io.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/list.h> | ||
12 | #include <linux/mm.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/suspend.h> | ||
15 | |||
16 | /* | ||
17 | * Platforms, like ACPI, may want us to save some memory used by them during | ||
18 | * suspend and to restore the contents of this memory during the subsequent | ||
19 | * resume. The code below implements a mechanism allowing us to do that. | ||
20 | */ | ||
21 | |||
22 | struct nvs_page { | ||
23 | unsigned long phys_start; | ||
24 | unsigned int size; | ||
25 | void *kaddr; | ||
26 | void *data; | ||
27 | struct list_head node; | ||
28 | }; | ||
29 | |||
30 | static LIST_HEAD(nvs_list); | ||
31 | |||
32 | /** | ||
33 | * suspend_nvs_register - register platform NVS memory region to save | ||
34 | * @start - physical address of the region | ||
35 | * @size - size of the region | ||
36 | * | ||
37 | * The NVS region need not be page-aligned (both ends) and we arrange | ||
38 | * things so that the data from page-aligned addresses in this region will | ||
39 | * be copied into separate RAM pages. | ||
40 | */ | ||
41 | int suspend_nvs_register(unsigned long start, unsigned long size) | ||
42 | { | ||
43 | struct nvs_page *entry, *next; | ||
44 | |||
45 | while (size > 0) { | ||
46 | unsigned int nr_bytes; | ||
47 | |||
48 | entry = kzalloc(sizeof(struct nvs_page), GFP_KERNEL); | ||
49 | if (!entry) | ||
50 | goto Error; | ||
51 | |||
52 | list_add_tail(&entry->node, &nvs_list); | ||
53 | entry->phys_start = start; | ||
54 | nr_bytes = PAGE_SIZE - (start & ~PAGE_MASK); | ||
55 | entry->size = (size < nr_bytes) ? size : nr_bytes; | ||
56 | |||
57 | start += entry->size; | ||
58 | size -= entry->size; | ||
59 | } | ||
60 | return 0; | ||
61 | |||
62 | Error: | ||
63 | list_for_each_entry_safe(entry, next, &nvs_list, node) { | ||
64 | list_del(&entry->node); | ||
65 | kfree(entry); | ||
66 | } | ||
67 | return -ENOMEM; | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * suspend_nvs_free - free data pages allocated for saving NVS regions | ||
72 | */ | ||
73 | void suspend_nvs_free(void) | ||
74 | { | ||
75 | struct nvs_page *entry; | ||
76 | |||
77 | list_for_each_entry(entry, &nvs_list, node) | ||
78 | if (entry->data) { | ||
79 | free_page((unsigned long)entry->data); | ||
80 | entry->data = NULL; | ||
81 | if (entry->kaddr) { | ||
82 | iounmap(entry->kaddr); | ||
83 | entry->kaddr = NULL; | ||
84 | } | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /** | ||
89 | * suspend_nvs_alloc - allocate memory necessary for saving NVS regions | ||
90 | */ | ||
91 | int suspend_nvs_alloc(void) | ||
92 | { | ||
93 | struct nvs_page *entry; | ||
94 | |||
95 | list_for_each_entry(entry, &nvs_list, node) { | ||
96 | entry->data = (void *)__get_free_page(GFP_KERNEL); | ||
97 | if (!entry->data) { | ||
98 | suspend_nvs_free(); | ||
99 | return -ENOMEM; | ||
100 | } | ||
101 | } | ||
102 | return 0; | ||
103 | } | ||
104 | |||
105 | /** | ||
106 | * suspend_nvs_save - save NVS memory regions | ||
107 | */ | ||
108 | void suspend_nvs_save(void) | ||
109 | { | ||
110 | struct nvs_page *entry; | ||
111 | |||
112 | printk(KERN_INFO "PM: Saving platform NVS memory\n"); | ||
113 | |||
114 | list_for_each_entry(entry, &nvs_list, node) | ||
115 | if (entry->data) { | ||
116 | entry->kaddr = ioremap(entry->phys_start, entry->size); | ||
117 | memcpy(entry->data, entry->kaddr, entry->size); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * suspend_nvs_restore - restore NVS memory regions | ||
123 | * | ||
124 | * This function is going to be called with interrupts disabled, so it | ||
125 | * cannot iounmap the virtual addresses used to access the NVS region. | ||
126 | */ | ||
127 | void suspend_nvs_restore(void) | ||
128 | { | ||
129 | struct nvs_page *entry; | ||
130 | |||
131 | printk(KERN_INFO "PM: Restoring platform NVS memory\n"); | ||
132 | |||
133 | list_for_each_entry(entry, &nvs_list, node) | ||
134 | if (entry->data) | ||
135 | memcpy(entry->kaddr, entry->data, entry->size); | ||
136 | } | ||
diff --git a/kernel/printk.c b/kernel/printk.c index 53d9a9ec88e6..36231525e22f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
@@ -97,7 +97,7 @@ static int console_locked, console_suspended; | |||
97 | /* | 97 | /* |
98 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars | 98 | * logbuf_lock protects log_buf, log_start, log_end, con_start and logged_chars |
99 | * It is also used in interesting ways to provide interlocking in | 99 | * It is also used in interesting ways to provide interlocking in |
100 | * release_console_sem(). | 100 | * console_unlock();. |
101 | */ | 101 | */ |
102 | static DEFINE_SPINLOCK(logbuf_lock); | 102 | static DEFINE_SPINLOCK(logbuf_lock); |
103 | 103 | ||
@@ -262,25 +262,47 @@ int dmesg_restrict = 1; | |||
262 | int dmesg_restrict; | 262 | int dmesg_restrict; |
263 | #endif | 263 | #endif |
264 | 264 | ||
265 | static int syslog_action_restricted(int type) | ||
266 | { | ||
267 | if (dmesg_restrict) | ||
268 | return 1; | ||
269 | /* Unless restricted, we allow "read all" and "get buffer size" for everybody */ | ||
270 | return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; | ||
271 | } | ||
272 | |||
273 | static int check_syslog_permissions(int type, bool from_file) | ||
274 | { | ||
275 | /* | ||
276 | * If this is from /proc/kmsg and we've already opened it, then we've | ||
277 | * already done the capabilities checks at open time. | ||
278 | */ | ||
279 | if (from_file && type != SYSLOG_ACTION_OPEN) | ||
280 | return 0; | ||
281 | |||
282 | if (syslog_action_restricted(type)) { | ||
283 | if (capable(CAP_SYSLOG)) | ||
284 | return 0; | ||
285 | /* For historical reasons, accept CAP_SYS_ADMIN too, with a warning */ | ||
286 | if (capable(CAP_SYS_ADMIN)) { | ||
287 | WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " | ||
288 | "but no CAP_SYSLOG (deprecated).\n"); | ||
289 | return 0; | ||
290 | } | ||
291 | return -EPERM; | ||
292 | } | ||
293 | return 0; | ||
294 | } | ||
295 | |||
265 | int do_syslog(int type, char __user *buf, int len, bool from_file) | 296 | int do_syslog(int type, char __user *buf, int len, bool from_file) |
266 | { | 297 | { |
267 | unsigned i, j, limit, count; | 298 | unsigned i, j, limit, count; |
268 | int do_clear = 0; | 299 | int do_clear = 0; |
269 | char c; | 300 | char c; |
270 | int error = 0; | 301 | int error; |
271 | 302 | ||
272 | /* | 303 | error = check_syslog_permissions(type, from_file); |
273 | * If this is from /proc/kmsg we only do the capabilities checks | 304 | if (error) |
274 | * at open time. | 305 | goto out; |
275 | */ | ||
276 | if (type == SYSLOG_ACTION_OPEN || !from_file) { | ||
277 | if (dmesg_restrict && !capable(CAP_SYSLOG)) | ||
278 | goto warn; /* switch to return -EPERM after 2.6.39 */ | ||
279 | if ((type != SYSLOG_ACTION_READ_ALL && | ||
280 | type != SYSLOG_ACTION_SIZE_BUFFER) && | ||
281 | !capable(CAP_SYSLOG)) | ||
282 | goto warn; /* switch to return -EPERM after 2.6.39 */ | ||
283 | } | ||
284 | 306 | ||
285 | error = security_syslog(type); | 307 | error = security_syslog(type); |
286 | if (error) | 308 | if (error) |
@@ -423,12 +445,6 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) | |||
423 | } | 445 | } |
424 | out: | 446 | out: |
425 | return error; | 447 | return error; |
426 | warn: | ||
427 | /* remove after 2.6.39 */ | ||
428 | if (capable(CAP_SYS_ADMIN)) | ||
429 | WARN_ONCE(1, "Attempt to access syslog with CAP_SYS_ADMIN " | ||
430 | "but no CAP_SYSLOG (deprecated and denied).\n"); | ||
431 | return -EPERM; | ||
432 | } | 448 | } |
433 | 449 | ||
434 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) | 450 | SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) |
@@ -501,7 +517,7 @@ static void _call_console_drivers(unsigned start, | |||
501 | /* | 517 | /* |
502 | * Call the console drivers, asking them to write out | 518 | * Call the console drivers, asking them to write out |
503 | * log_buf[start] to log_buf[end - 1]. | 519 | * log_buf[start] to log_buf[end - 1]. |
504 | * The console_sem must be held. | 520 | * The console_lock must be held. |
505 | */ | 521 | */ |
506 | static void call_console_drivers(unsigned start, unsigned end) | 522 | static void call_console_drivers(unsigned start, unsigned end) |
507 | { | 523 | { |
@@ -604,11 +620,11 @@ static int have_callable_console(void) | |||
604 | * | 620 | * |
605 | * This is printk(). It can be called from any context. We want it to work. | 621 | * This is printk(). It can be called from any context. We want it to work. |
606 | * | 622 | * |
607 | * We try to grab the console_sem. If we succeed, it's easy - we log the output and | 623 | * We try to grab the console_lock. If we succeed, it's easy - we log the output and |
608 | * call the console drivers. If we fail to get the semaphore we place the output | 624 | * call the console drivers. If we fail to get the semaphore we place the output |
609 | * into the log buffer and return. The current holder of the console_sem will | 625 | * into the log buffer and return. The current holder of the console_sem will |
610 | * notice the new output in release_console_sem() and will send it to the | 626 | * notice the new output in console_unlock(); and will send it to the |
611 | * consoles before releasing the semaphore. | 627 | * consoles before releasing the lock. |
612 | * | 628 | * |
613 | * One effect of this deferred printing is that code which calls printk() and | 629 | * One effect of this deferred printing is that code which calls printk() and |
614 | * then changes console_loglevel may break. This is because console_loglevel | 630 | * then changes console_loglevel may break. This is because console_loglevel |
@@ -659,19 +675,19 @@ static inline int can_use_console(unsigned int cpu) | |||
659 | /* | 675 | /* |
660 | * Try to get console ownership to actually show the kernel | 676 | * Try to get console ownership to actually show the kernel |
661 | * messages from a 'printk'. Return true (and with the | 677 | * messages from a 'printk'. Return true (and with the |
662 | * console_semaphore held, and 'console_locked' set) if it | 678 | * console_lock held, and 'console_locked' set) if it |
663 | * is successful, false otherwise. | 679 | * is successful, false otherwise. |
664 | * | 680 | * |
665 | * This gets called with the 'logbuf_lock' spinlock held and | 681 | * This gets called with the 'logbuf_lock' spinlock held and |
666 | * interrupts disabled. It should return with 'lockbuf_lock' | 682 | * interrupts disabled. It should return with 'lockbuf_lock' |
667 | * released but interrupts still disabled. | 683 | * released but interrupts still disabled. |
668 | */ | 684 | */ |
669 | static int acquire_console_semaphore_for_printk(unsigned int cpu) | 685 | static int console_trylock_for_printk(unsigned int cpu) |
670 | __releases(&logbuf_lock) | 686 | __releases(&logbuf_lock) |
671 | { | 687 | { |
672 | int retval = 0; | 688 | int retval = 0; |
673 | 689 | ||
674 | if (!try_acquire_console_sem()) { | 690 | if (console_trylock()) { |
675 | retval = 1; | 691 | retval = 1; |
676 | 692 | ||
677 | /* | 693 | /* |
@@ -827,12 +843,12 @@ asmlinkage int vprintk(const char *fmt, va_list args) | |||
827 | * actual magic (print out buffers, wake up klogd, | 843 | * actual magic (print out buffers, wake up klogd, |
828 | * etc). | 844 | * etc). |
829 | * | 845 | * |
830 | * The acquire_console_semaphore_for_printk() function | 846 | * The console_trylock_for_printk() function |
831 | * will release 'logbuf_lock' regardless of whether it | 847 | * will release 'logbuf_lock' regardless of whether it |
832 | * actually gets the semaphore or not. | 848 | * actually gets the semaphore or not. |
833 | */ | 849 | */ |
834 | if (acquire_console_semaphore_for_printk(this_cpu)) | 850 | if (console_trylock_for_printk(this_cpu)) |
835 | release_console_sem(); | 851 | console_unlock(); |
836 | 852 | ||
837 | lockdep_on(); | 853 | lockdep_on(); |
838 | out_restore_irqs: | 854 | out_restore_irqs: |
@@ -993,7 +1009,7 @@ void suspend_console(void) | |||
993 | if (!console_suspend_enabled) | 1009 | if (!console_suspend_enabled) |
994 | return; | 1010 | return; |
995 | printk("Suspending console(s) (use no_console_suspend to debug)\n"); | 1011 | printk("Suspending console(s) (use no_console_suspend to debug)\n"); |
996 | acquire_console_sem(); | 1012 | console_lock(); |
997 | console_suspended = 1; | 1013 | console_suspended = 1; |
998 | up(&console_sem); | 1014 | up(&console_sem); |
999 | } | 1015 | } |
@@ -1004,7 +1020,7 @@ void resume_console(void) | |||
1004 | return; | 1020 | return; |
1005 | down(&console_sem); | 1021 | down(&console_sem); |
1006 | console_suspended = 0; | 1022 | console_suspended = 0; |
1007 | release_console_sem(); | 1023 | console_unlock(); |
1008 | } | 1024 | } |
1009 | 1025 | ||
1010 | /** | 1026 | /** |
@@ -1027,21 +1043,21 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self, | |||
1027 | case CPU_DYING: | 1043 | case CPU_DYING: |
1028 | case CPU_DOWN_FAILED: | 1044 | case CPU_DOWN_FAILED: |
1029 | case CPU_UP_CANCELED: | 1045 | case CPU_UP_CANCELED: |
1030 | acquire_console_sem(); | 1046 | console_lock(); |
1031 | release_console_sem(); | 1047 | console_unlock(); |
1032 | } | 1048 | } |
1033 | return NOTIFY_OK; | 1049 | return NOTIFY_OK; |
1034 | } | 1050 | } |
1035 | 1051 | ||
1036 | /** | 1052 | /** |
1037 | * acquire_console_sem - lock the console system for exclusive use. | 1053 | * console_lock - lock the console system for exclusive use. |
1038 | * | 1054 | * |
1039 | * Acquires a semaphore which guarantees that the caller has | 1055 | * Acquires a lock which guarantees that the caller has |
1040 | * exclusive access to the console system and the console_drivers list. | 1056 | * exclusive access to the console system and the console_drivers list. |
1041 | * | 1057 | * |
1042 | * Can sleep, returns nothing. | 1058 | * Can sleep, returns nothing. |
1043 | */ | 1059 | */ |
1044 | void acquire_console_sem(void) | 1060 | void console_lock(void) |
1045 | { | 1061 | { |
1046 | BUG_ON(in_interrupt()); | 1062 | BUG_ON(in_interrupt()); |
1047 | down(&console_sem); | 1063 | down(&console_sem); |
@@ -1050,21 +1066,29 @@ void acquire_console_sem(void) | |||
1050 | console_locked = 1; | 1066 | console_locked = 1; |
1051 | console_may_schedule = 1; | 1067 | console_may_schedule = 1; |
1052 | } | 1068 | } |
1053 | EXPORT_SYMBOL(acquire_console_sem); | 1069 | EXPORT_SYMBOL(console_lock); |
1054 | 1070 | ||
1055 | int try_acquire_console_sem(void) | 1071 | /** |
1072 | * console_trylock - try to lock the console system for exclusive use. | ||
1073 | * | ||
1074 | * Tried to acquire a lock which guarantees that the caller has | ||
1075 | * exclusive access to the console system and the console_drivers list. | ||
1076 | * | ||
1077 | * returns 1 on success, and 0 on failure to acquire the lock. | ||
1078 | */ | ||
1079 | int console_trylock(void) | ||
1056 | { | 1080 | { |
1057 | if (down_trylock(&console_sem)) | 1081 | if (down_trylock(&console_sem)) |
1058 | return -1; | 1082 | return 0; |
1059 | if (console_suspended) { | 1083 | if (console_suspended) { |
1060 | up(&console_sem); | 1084 | up(&console_sem); |
1061 | return -1; | 1085 | return 0; |
1062 | } | 1086 | } |
1063 | console_locked = 1; | 1087 | console_locked = 1; |
1064 | console_may_schedule = 0; | 1088 | console_may_schedule = 0; |
1065 | return 0; | 1089 | return 1; |
1066 | } | 1090 | } |
1067 | EXPORT_SYMBOL(try_acquire_console_sem); | 1091 | EXPORT_SYMBOL(console_trylock); |
1068 | 1092 | ||
1069 | int is_console_locked(void) | 1093 | int is_console_locked(void) |
1070 | { | 1094 | { |
@@ -1095,20 +1119,20 @@ void wake_up_klogd(void) | |||
1095 | } | 1119 | } |
1096 | 1120 | ||
1097 | /** | 1121 | /** |
1098 | * release_console_sem - unlock the console system | 1122 | * console_unlock - unlock the console system |
1099 | * | 1123 | * |
1100 | * Releases the semaphore which the caller holds on the console system | 1124 | * Releases the console_lock which the caller holds on the console system |
1101 | * and the console driver list. | 1125 | * and the console driver list. |
1102 | * | 1126 | * |
1103 | * While the semaphore was held, console output may have been buffered | 1127 | * While the console_lock was held, console output may have been buffered |
1104 | * by printk(). If this is the case, release_console_sem() emits | 1128 | * by printk(). If this is the case, console_unlock(); emits |
1105 | * the output prior to releasing the semaphore. | 1129 | * the output prior to releasing the lock. |
1106 | * | 1130 | * |
1107 | * If there is output waiting for klogd, we wake it up. | 1131 | * If there is output waiting for klogd, we wake it up. |
1108 | * | 1132 | * |
1109 | * release_console_sem() may be called from any context. | 1133 | * console_unlock(); may be called from any context. |
1110 | */ | 1134 | */ |
1111 | void release_console_sem(void) | 1135 | void console_unlock(void) |
1112 | { | 1136 | { |
1113 | unsigned long flags; | 1137 | unsigned long flags; |
1114 | unsigned _con_start, _log_end; | 1138 | unsigned _con_start, _log_end; |
@@ -1141,7 +1165,7 @@ void release_console_sem(void) | |||
1141 | if (wake_klogd) | 1165 | if (wake_klogd) |
1142 | wake_up_klogd(); | 1166 | wake_up_klogd(); |
1143 | } | 1167 | } |
1144 | EXPORT_SYMBOL(release_console_sem); | 1168 | EXPORT_SYMBOL(console_unlock); |
1145 | 1169 | ||
1146 | /** | 1170 | /** |
1147 | * console_conditional_schedule - yield the CPU if required | 1171 | * console_conditional_schedule - yield the CPU if required |
@@ -1150,7 +1174,7 @@ EXPORT_SYMBOL(release_console_sem); | |||
1150 | * if this CPU should yield the CPU to another task, do | 1174 | * if this CPU should yield the CPU to another task, do |
1151 | * so here. | 1175 | * so here. |
1152 | * | 1176 | * |
1153 | * Must be called within acquire_console_sem(). | 1177 | * Must be called within console_lock();. |
1154 | */ | 1178 | */ |
1155 | void __sched console_conditional_schedule(void) | 1179 | void __sched console_conditional_schedule(void) |
1156 | { | 1180 | { |
@@ -1171,14 +1195,14 @@ void console_unblank(void) | |||
1171 | if (down_trylock(&console_sem) != 0) | 1195 | if (down_trylock(&console_sem) != 0) |
1172 | return; | 1196 | return; |
1173 | } else | 1197 | } else |
1174 | acquire_console_sem(); | 1198 | console_lock(); |
1175 | 1199 | ||
1176 | console_locked = 1; | 1200 | console_locked = 1; |
1177 | console_may_schedule = 0; | 1201 | console_may_schedule = 0; |
1178 | for_each_console(c) | 1202 | for_each_console(c) |
1179 | if ((c->flags & CON_ENABLED) && c->unblank) | 1203 | if ((c->flags & CON_ENABLED) && c->unblank) |
1180 | c->unblank(); | 1204 | c->unblank(); |
1181 | release_console_sem(); | 1205 | console_unlock(); |
1182 | } | 1206 | } |
1183 | 1207 | ||
1184 | /* | 1208 | /* |
@@ -1189,7 +1213,7 @@ struct tty_driver *console_device(int *index) | |||
1189 | struct console *c; | 1213 | struct console *c; |
1190 | struct tty_driver *driver = NULL; | 1214 | struct tty_driver *driver = NULL; |
1191 | 1215 | ||
1192 | acquire_console_sem(); | 1216 | console_lock(); |
1193 | for_each_console(c) { | 1217 | for_each_console(c) { |
1194 | if (!c->device) | 1218 | if (!c->device) |
1195 | continue; | 1219 | continue; |
@@ -1197,7 +1221,7 @@ struct tty_driver *console_device(int *index) | |||
1197 | if (driver) | 1221 | if (driver) |
1198 | break; | 1222 | break; |
1199 | } | 1223 | } |
1200 | release_console_sem(); | 1224 | console_unlock(); |
1201 | return driver; | 1225 | return driver; |
1202 | } | 1226 | } |
1203 | 1227 | ||
@@ -1208,17 +1232,17 @@ struct tty_driver *console_device(int *index) | |||
1208 | */ | 1232 | */ |
1209 | void console_stop(struct console *console) | 1233 | void console_stop(struct console *console) |
1210 | { | 1234 | { |
1211 | acquire_console_sem(); | 1235 | console_lock(); |
1212 | console->flags &= ~CON_ENABLED; | 1236 | console->flags &= ~CON_ENABLED; |
1213 | release_console_sem(); | 1237 | console_unlock(); |
1214 | } | 1238 | } |
1215 | EXPORT_SYMBOL(console_stop); | 1239 | EXPORT_SYMBOL(console_stop); |
1216 | 1240 | ||
1217 | void console_start(struct console *console) | 1241 | void console_start(struct console *console) |
1218 | { | 1242 | { |
1219 | acquire_console_sem(); | 1243 | console_lock(); |
1220 | console->flags |= CON_ENABLED; | 1244 | console->flags |= CON_ENABLED; |
1221 | release_console_sem(); | 1245 | console_unlock(); |
1222 | } | 1246 | } |
1223 | EXPORT_SYMBOL(console_start); | 1247 | EXPORT_SYMBOL(console_start); |
1224 | 1248 | ||
@@ -1340,7 +1364,7 @@ void register_console(struct console *newcon) | |||
1340 | * Put this console in the list - keep the | 1364 | * Put this console in the list - keep the |
1341 | * preferred driver at the head of the list. | 1365 | * preferred driver at the head of the list. |
1342 | */ | 1366 | */ |
1343 | acquire_console_sem(); | 1367 | console_lock(); |
1344 | if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { | 1368 | if ((newcon->flags & CON_CONSDEV) || console_drivers == NULL) { |
1345 | newcon->next = console_drivers; | 1369 | newcon->next = console_drivers; |
1346 | console_drivers = newcon; | 1370 | console_drivers = newcon; |
@@ -1352,14 +1376,14 @@ void register_console(struct console *newcon) | |||
1352 | } | 1376 | } |
1353 | if (newcon->flags & CON_PRINTBUFFER) { | 1377 | if (newcon->flags & CON_PRINTBUFFER) { |
1354 | /* | 1378 | /* |
1355 | * release_console_sem() will print out the buffered messages | 1379 | * console_unlock(); will print out the buffered messages |
1356 | * for us. | 1380 | * for us. |
1357 | */ | 1381 | */ |
1358 | spin_lock_irqsave(&logbuf_lock, flags); | 1382 | spin_lock_irqsave(&logbuf_lock, flags); |
1359 | con_start = log_start; | 1383 | con_start = log_start; |
1360 | spin_unlock_irqrestore(&logbuf_lock, flags); | 1384 | spin_unlock_irqrestore(&logbuf_lock, flags); |
1361 | } | 1385 | } |
1362 | release_console_sem(); | 1386 | console_unlock(); |
1363 | console_sysfs_notify(); | 1387 | console_sysfs_notify(); |
1364 | 1388 | ||
1365 | /* | 1389 | /* |
@@ -1396,7 +1420,7 @@ int unregister_console(struct console *console) | |||
1396 | return braille_unregister_console(console); | 1420 | return braille_unregister_console(console); |
1397 | #endif | 1421 | #endif |
1398 | 1422 | ||
1399 | acquire_console_sem(); | 1423 | console_lock(); |
1400 | if (console_drivers == console) { | 1424 | if (console_drivers == console) { |
1401 | console_drivers=console->next; | 1425 | console_drivers=console->next; |
1402 | res = 0; | 1426 | res = 0; |
@@ -1418,7 +1442,7 @@ int unregister_console(struct console *console) | |||
1418 | if (console_drivers != NULL && console->flags & CON_CONSDEV) | 1442 | if (console_drivers != NULL && console->flags & CON_CONSDEV) |
1419 | console_drivers->flags |= CON_CONSDEV; | 1443 | console_drivers->flags |= CON_CONSDEV; |
1420 | 1444 | ||
1421 | release_console_sem(); | 1445 | console_unlock(); |
1422 | console_sysfs_notify(); | 1446 | console_sysfs_notify(); |
1423 | return res; | 1447 | return res; |
1424 | } | 1448 | } |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 99bbaa3e5b0d..1708b1e2972d 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -313,7 +313,7 @@ int ptrace_detach(struct task_struct *child, unsigned int data) | |||
313 | child->exit_code = data; | 313 | child->exit_code = data; |
314 | dead = __ptrace_detach(current, child); | 314 | dead = __ptrace_detach(current, child); |
315 | if (!child->exit_state) | 315 | if (!child->exit_state) |
316 | wake_up_process(child); | 316 | wake_up_state(child, TASK_TRACED | TASK_STOPPED); |
317 | } | 317 | } |
318 | write_unlock_irq(&tasklist_lock); | 318 | write_unlock_irq(&tasklist_lock); |
319 | 319 | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 034493724749..0c343b9a46d5 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -189,7 +189,8 @@ static int rcu_kthread(void *arg) | |||
189 | unsigned long flags; | 189 | unsigned long flags; |
190 | 190 | ||
191 | for (;;) { | 191 | for (;;) { |
192 | wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); | 192 | wait_event_interruptible(rcu_kthread_wq, |
193 | have_rcu_kthread_work != 0); | ||
193 | morework = rcu_boost(); | 194 | morework = rcu_boost(); |
194 | local_irq_save(flags); | 195 | local_irq_save(flags); |
195 | work = have_rcu_kthread_work; | 196 | work = have_rcu_kthread_work; |
diff --git a/kernel/sched.c b/kernel/sched.c index ea3e5eff3878..18d38e4ec7ba 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -553,9 +553,6 @@ struct rq { | |||
553 | /* try_to_wake_up() stats */ | 553 | /* try_to_wake_up() stats */ |
554 | unsigned int ttwu_count; | 554 | unsigned int ttwu_count; |
555 | unsigned int ttwu_local; | 555 | unsigned int ttwu_local; |
556 | |||
557 | /* BKL stats */ | ||
558 | unsigned int bkl_count; | ||
559 | #endif | 556 | #endif |
560 | }; | 557 | }; |
561 | 558 | ||
@@ -609,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
609 | struct task_group *tg; | 606 | struct task_group *tg; |
610 | struct cgroup_subsys_state *css; | 607 | struct cgroup_subsys_state *css; |
611 | 608 | ||
609 | if (p->flags & PF_EXITING) | ||
610 | return &root_task_group; | ||
611 | |||
612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 612 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
613 | lockdep_is_held(&task_rq(p)->lock)); | 613 | lockdep_is_held(&task_rq(p)->lock)); |
614 | tg = container_of(css, struct task_group, css); | 614 | tg = container_of(css, struct task_group, css); |
@@ -3887,7 +3887,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
3887 | schedstat_inc(this_rq(), sched_count); | 3887 | schedstat_inc(this_rq(), sched_count); |
3888 | #ifdef CONFIG_SCHEDSTATS | 3888 | #ifdef CONFIG_SCHEDSTATS |
3889 | if (unlikely(prev->lock_depth >= 0)) { | 3889 | if (unlikely(prev->lock_depth >= 0)) { |
3890 | schedstat_inc(this_rq(), bkl_count); | 3890 | schedstat_inc(this_rq(), rq_sched_info.bkl_count); |
3891 | schedstat_inc(prev, sched_info.bkl_count); | 3891 | schedstat_inc(prev, sched_info.bkl_count); |
3892 | } | 3892 | } |
3893 | #endif | 3893 | #endif |
@@ -4871,7 +4871,8 @@ recheck: | |||
4871 | * assigned. | 4871 | * assigned. |
4872 | */ | 4872 | */ |
4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 4873 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
4874 | task_group(p)->rt_bandwidth.rt_runtime == 0) { | 4874 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
4875 | !task_group_is_autogroup(task_group(p))) { | ||
4875 | __task_rq_unlock(rq); | 4876 | __task_rq_unlock(rq); |
4876 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | 4877 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
4877 | return -EPERM; | 4878 | return -EPERM; |
@@ -8882,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
8882 | } | 8883 | } |
8883 | } | 8884 | } |
8884 | 8885 | ||
8886 | static void | ||
8887 | cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) | ||
8888 | { | ||
8889 | /* | ||
8890 | * cgroup_exit() is called in the copy_process() failure path. | ||
8891 | * Ignore this case since the task hasn't ran yet, this avoids | ||
8892 | * trying to poke a half freed task state from generic code. | ||
8893 | */ | ||
8894 | if (!(task->flags & PF_EXITING)) | ||
8895 | return; | ||
8896 | |||
8897 | sched_move_task(task); | ||
8898 | } | ||
8899 | |||
8885 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8900 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8886 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, | 8901 | static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, |
8887 | u64 shareval) | 8902 | u64 shareval) |
@@ -8954,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { | |||
8954 | .destroy = cpu_cgroup_destroy, | 8969 | .destroy = cpu_cgroup_destroy, |
8955 | .can_attach = cpu_cgroup_can_attach, | 8970 | .can_attach = cpu_cgroup_can_attach, |
8956 | .attach = cpu_cgroup_attach, | 8971 | .attach = cpu_cgroup_attach, |
8972 | .exit = cpu_cgroup_exit, | ||
8957 | .populate = cpu_cgroup_populate, | 8973 | .populate = cpu_cgroup_populate, |
8958 | .subsys_id = cpu_cgroup_subsys_id, | 8974 | .subsys_id = cpu_cgroup_subsys_id, |
8959 | .early_init = 1, | 8975 | .early_init = 1, |
diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 32a723b8f84c..9fb656283157 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c | |||
@@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref) | |||
27 | { | 27 | { |
28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); | 28 | struct autogroup *ag = container_of(kref, struct autogroup, kref); |
29 | 29 | ||
30 | #ifdef CONFIG_RT_GROUP_SCHED | ||
31 | /* We've redirected RT tasks to the root task group... */ | ||
32 | ag->tg->rt_se = NULL; | ||
33 | ag->tg->rt_rq = NULL; | ||
34 | #endif | ||
30 | sched_destroy_group(ag->tg); | 35 | sched_destroy_group(ag->tg); |
31 | } | 36 | } |
32 | 37 | ||
@@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p) | |||
55 | return ag; | 60 | return ag; |
56 | } | 61 | } |
57 | 62 | ||
63 | #ifdef CONFIG_RT_GROUP_SCHED | ||
64 | static void free_rt_sched_group(struct task_group *tg); | ||
65 | #endif | ||
66 | |||
58 | static inline struct autogroup *autogroup_create(void) | 67 | static inline struct autogroup *autogroup_create(void) |
59 | { | 68 | { |
60 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); | 69 | struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); |
@@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void) | |||
72 | init_rwsem(&ag->lock); | 81 | init_rwsem(&ag->lock); |
73 | ag->id = atomic_inc_return(&autogroup_seq_nr); | 82 | ag->id = atomic_inc_return(&autogroup_seq_nr); |
74 | ag->tg = tg; | 83 | ag->tg = tg; |
84 | #ifdef CONFIG_RT_GROUP_SCHED | ||
85 | /* | ||
86 | * Autogroup RT tasks are redirected to the root task group | ||
87 | * so we don't have to move tasks around upon policy change, | ||
88 | * or flail around trying to allocate bandwidth on the fly. | ||
89 | * A bandwidth exception in __sched_setscheduler() allows | ||
90 | * the policy change to proceed. Thereafter, task_group() | ||
91 | * returns &root_task_group, so zero bandwidth is required. | ||
92 | */ | ||
93 | free_rt_sched_group(tg); | ||
94 | tg->rt_se = root_task_group.rt_se; | ||
95 | tg->rt_rq = root_task_group.rt_rq; | ||
96 | #endif | ||
75 | tg->autogroup = ag; | 97 | tg->autogroup = ag; |
76 | 98 | ||
77 | return ag; | 99 | return ag; |
@@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) | |||
106 | return true; | 128 | return true; |
107 | } | 129 | } |
108 | 130 | ||
131 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
132 | { | ||
133 | return tg != &root_task_group && tg->autogroup; | ||
134 | } | ||
135 | |||
109 | static inline struct task_group * | 136 | static inline struct task_group * |
110 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 137 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
111 | { | 138 | { |
@@ -231,6 +258,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) | |||
231 | #ifdef CONFIG_SCHED_DEBUG | 258 | #ifdef CONFIG_SCHED_DEBUG |
232 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) | 259 | static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) |
233 | { | 260 | { |
261 | int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); | ||
262 | |||
263 | if (!enabled || !tg->autogroup) | ||
264 | return 0; | ||
265 | |||
234 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); | 266 | return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); |
235 | } | 267 | } |
236 | #endif /* CONFIG_SCHED_DEBUG */ | 268 | #endif /* CONFIG_SCHED_DEBUG */ |
diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 5358e241cb20..7b859ffe5dad 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h | |||
@@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg); | |||
15 | 15 | ||
16 | static inline void autogroup_init(struct task_struct *init_task) { } | 16 | static inline void autogroup_init(struct task_struct *init_task) { } |
17 | static inline void autogroup_free(struct task_group *tg) { } | 17 | static inline void autogroup_free(struct task_group *tg) { } |
18 | static inline bool task_group_is_autogroup(struct task_group *tg) | ||
19 | { | ||
20 | return 0; | ||
21 | } | ||
18 | 22 | ||
19 | static inline struct task_group * | 23 | static inline struct task_group * |
20 | autogroup_task_group(struct task_struct *p, struct task_group *tg) | 24 | autogroup_task_group(struct task_struct *p, struct task_group *tg) |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 1dfae3d014b5..eb6cb8edd075 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -16,6 +16,8 @@ | |||
16 | #include <linux/kallsyms.h> | 16 | #include <linux/kallsyms.h> |
17 | #include <linux/utsname.h> | 17 | #include <linux/utsname.h> |
18 | 18 | ||
19 | static DEFINE_SPINLOCK(sched_debug_lock); | ||
20 | |||
19 | /* | 21 | /* |
20 | * This allows printing both to /proc/sched_debug and | 22 | * This allows printing both to /proc/sched_debug and |
21 | * to the console | 23 | * to the console |
@@ -86,6 +88,26 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group | |||
86 | } | 88 | } |
87 | #endif | 89 | #endif |
88 | 90 | ||
91 | #ifdef CONFIG_CGROUP_SCHED | ||
92 | static char group_path[PATH_MAX]; | ||
93 | |||
94 | static char *task_group_path(struct task_group *tg) | ||
95 | { | ||
96 | if (autogroup_path(tg, group_path, PATH_MAX)) | ||
97 | return group_path; | ||
98 | |||
99 | /* | ||
100 | * May be NULL if the underlying cgroup isn't fully-created yet | ||
101 | */ | ||
102 | if (!tg->css.cgroup) { | ||
103 | group_path[0] = '\0'; | ||
104 | return group_path; | ||
105 | } | ||
106 | cgroup_path(tg->css.cgroup, group_path, PATH_MAX); | ||
107 | return group_path; | ||
108 | } | ||
109 | #endif | ||
110 | |||
89 | static void | 111 | static void |
90 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | 112 | print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) |
91 | { | 113 | { |
@@ -108,6 +130,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
108 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", | 130 | SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", |
109 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); | 131 | 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); |
110 | #endif | 132 | #endif |
133 | #ifdef CONFIG_CGROUP_SCHED | ||
134 | SEQ_printf(m, " %s", task_group_path(task_group(p))); | ||
135 | #endif | ||
111 | 136 | ||
112 | SEQ_printf(m, "\n"); | 137 | SEQ_printf(m, "\n"); |
113 | } | 138 | } |
@@ -144,7 +169,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
144 | struct sched_entity *last; | 169 | struct sched_entity *last; |
145 | unsigned long flags; | 170 | unsigned long flags; |
146 | 171 | ||
172 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
173 | SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); | ||
174 | #else | ||
147 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); | 175 | SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); |
176 | #endif | ||
148 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", | 177 | SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", |
149 | SPLIT_NS(cfs_rq->exec_clock)); | 178 | SPLIT_NS(cfs_rq->exec_clock)); |
150 | 179 | ||
@@ -191,7 +220,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) | |||
191 | 220 | ||
192 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) | 221 | void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) |
193 | { | 222 | { |
223 | #ifdef CONFIG_RT_GROUP_SCHED | ||
224 | SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); | ||
225 | #else | ||
194 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); | 226 | SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); |
227 | #endif | ||
195 | 228 | ||
196 | #define P(x) \ | 229 | #define P(x) \ |
197 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) | 230 | SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) |
@@ -212,6 +245,7 @@ extern __read_mostly int sched_clock_running; | |||
212 | static void print_cpu(struct seq_file *m, int cpu) | 245 | static void print_cpu(struct seq_file *m, int cpu) |
213 | { | 246 | { |
214 | struct rq *rq = cpu_rq(cpu); | 247 | struct rq *rq = cpu_rq(cpu); |
248 | unsigned long flags; | ||
215 | 249 | ||
216 | #ifdef CONFIG_X86 | 250 | #ifdef CONFIG_X86 |
217 | { | 251 | { |
@@ -262,14 +296,20 @@ static void print_cpu(struct seq_file *m, int cpu) | |||
262 | P(ttwu_count); | 296 | P(ttwu_count); |
263 | P(ttwu_local); | 297 | P(ttwu_local); |
264 | 298 | ||
265 | P(bkl_count); | 299 | SEQ_printf(m, " .%-30s: %d\n", "bkl_count", |
300 | rq->rq_sched_info.bkl_count); | ||
266 | 301 | ||
267 | #undef P | 302 | #undef P |
303 | #undef P64 | ||
268 | #endif | 304 | #endif |
305 | spin_lock_irqsave(&sched_debug_lock, flags); | ||
269 | print_cfs_stats(m, cpu); | 306 | print_cfs_stats(m, cpu); |
270 | print_rt_stats(m, cpu); | 307 | print_rt_stats(m, cpu); |
271 | 308 | ||
309 | rcu_read_lock(); | ||
272 | print_rq(m, rq, cpu); | 310 | print_rq(m, rq, cpu); |
311 | rcu_read_unlock(); | ||
312 | spin_unlock_irqrestore(&sched_debug_lock, flags); | ||
273 | } | 313 | } |
274 | 314 | ||
275 | static const char *sched_tunable_scaling_names[] = { | 315 | static const char *sched_tunable_scaling_names[] = { |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c62ebae65cf0..0c26e2df450e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
699 | cfs_rq->nr_running--; | 699 | cfs_rq->nr_running--; |
700 | } | 700 | } |
701 | 701 | ||
702 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | 702 | #ifdef CONFIG_FAIR_GROUP_SCHED |
703 | # ifdef CONFIG_SMP | ||
703 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, | 704 | static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, |
704 | int global_update) | 705 | int global_update) |
705 | { | 706 | { |
@@ -721,10 +722,10 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
721 | u64 now, delta; | 722 | u64 now, delta; |
722 | unsigned long load = cfs_rq->load.weight; | 723 | unsigned long load = cfs_rq->load.weight; |
723 | 724 | ||
724 | if (!cfs_rq) | 725 | if (cfs_rq->tg == &root_task_group) |
725 | return; | 726 | return; |
726 | 727 | ||
727 | now = rq_of(cfs_rq)->clock; | 728 | now = rq_of(cfs_rq)->clock_task; |
728 | delta = now - cfs_rq->load_stamp; | 729 | delta = now - cfs_rq->load_stamp; |
729 | 730 | ||
730 | /* truncate load history at 4 idle periods */ | 731 | /* truncate load history at 4 idle periods */ |
@@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
762 | list_del_leaf_cfs_rq(cfs_rq); | 763 | list_del_leaf_cfs_rq(cfs_rq); |
763 | } | 764 | } |
764 | 765 | ||
766 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | ||
767 | long weight_delta) | ||
768 | { | ||
769 | long load_weight, load, shares; | ||
770 | |||
771 | load = cfs_rq->load.weight + weight_delta; | ||
772 | |||
773 | load_weight = atomic_read(&tg->load_weight); | ||
774 | load_weight -= cfs_rq->load_contribution; | ||
775 | load_weight += load; | ||
776 | |||
777 | shares = (tg->shares * load); | ||
778 | if (load_weight) | ||
779 | shares /= load_weight; | ||
780 | |||
781 | if (shares < MIN_SHARES) | ||
782 | shares = MIN_SHARES; | ||
783 | if (shares > tg->shares) | ||
784 | shares = tg->shares; | ||
785 | |||
786 | return shares; | ||
787 | } | ||
788 | |||
789 | static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
790 | { | ||
791 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | ||
792 | update_cfs_load(cfs_rq, 0); | ||
793 | update_cfs_shares(cfs_rq, 0); | ||
794 | } | ||
795 | } | ||
796 | # else /* CONFIG_SMP */ | ||
797 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | ||
798 | { | ||
799 | } | ||
800 | |||
801 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | ||
802 | long weight_delta) | ||
803 | { | ||
804 | return tg->shares; | ||
805 | } | ||
806 | |||
807 | static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
808 | { | ||
809 | } | ||
810 | # endif /* CONFIG_SMP */ | ||
765 | static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | 811 | static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, |
766 | unsigned long weight) | 812 | unsigned long weight) |
767 | { | 813 | { |
@@ -782,41 +828,20 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | |||
782 | { | 828 | { |
783 | struct task_group *tg; | 829 | struct task_group *tg; |
784 | struct sched_entity *se; | 830 | struct sched_entity *se; |
785 | long load_weight, load, shares; | 831 | long shares; |
786 | |||
787 | if (!cfs_rq) | ||
788 | return; | ||
789 | 832 | ||
790 | tg = cfs_rq->tg; | 833 | tg = cfs_rq->tg; |
791 | se = tg->se[cpu_of(rq_of(cfs_rq))]; | 834 | se = tg->se[cpu_of(rq_of(cfs_rq))]; |
792 | if (!se) | 835 | if (!se) |
793 | return; | 836 | return; |
794 | 837 | #ifndef CONFIG_SMP | |
795 | load = cfs_rq->load.weight + weight_delta; | 838 | if (likely(se->load.weight == tg->shares)) |
796 | 839 | return; | |
797 | load_weight = atomic_read(&tg->load_weight); | 840 | #endif |
798 | load_weight -= cfs_rq->load_contribution; | 841 | shares = calc_cfs_shares(cfs_rq, tg, weight_delta); |
799 | load_weight += load; | ||
800 | |||
801 | shares = (tg->shares * load); | ||
802 | if (load_weight) | ||
803 | shares /= load_weight; | ||
804 | |||
805 | if (shares < MIN_SHARES) | ||
806 | shares = MIN_SHARES; | ||
807 | if (shares > tg->shares) | ||
808 | shares = tg->shares; | ||
809 | 842 | ||
810 | reweight_entity(cfs_rq_of(se), se, shares); | 843 | reweight_entity(cfs_rq_of(se), se, shares); |
811 | } | 844 | } |
812 | |||
813 | static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | ||
814 | { | ||
815 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | ||
816 | update_cfs_load(cfs_rq, 0); | ||
817 | update_cfs_shares(cfs_rq, 0); | ||
818 | } | ||
819 | } | ||
820 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 845 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
821 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | 846 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) |
822 | { | 847 | { |
@@ -1062,6 +1087,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
1062 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1087 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
1063 | s64 delta = curr->vruntime - se->vruntime; | 1088 | s64 delta = curr->vruntime - se->vruntime; |
1064 | 1089 | ||
1090 | if (delta < 0) | ||
1091 | return; | ||
1092 | |||
1065 | if (delta > ideal_runtime) | 1093 | if (delta > ideal_runtime) |
1066 | resched_task(rq_of(cfs_rq)->curr); | 1094 | resched_task(rq_of(cfs_rq)->curr); |
1067 | } | 1095 | } |
@@ -1362,27 +1390,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) | |||
1362 | return wl; | 1390 | return wl; |
1363 | 1391 | ||
1364 | for_each_sched_entity(se) { | 1392 | for_each_sched_entity(se) { |
1365 | long S, rw, s, a, b; | 1393 | long lw, w; |
1366 | 1394 | ||
1367 | S = se->my_q->tg->shares; | 1395 | tg = se->my_q->tg; |
1368 | s = se->load.weight; | 1396 | w = se->my_q->load.weight; |
1369 | rw = se->my_q->load.weight; | ||
1370 | 1397 | ||
1371 | a = S*(rw + wl); | 1398 | /* use this cpu's instantaneous contribution */ |
1372 | b = S*rw + s*wg; | 1399 | lw = atomic_read(&tg->load_weight); |
1400 | lw -= se->my_q->load_contribution; | ||
1401 | lw += w + wg; | ||
1373 | 1402 | ||
1374 | wl = s*(a-b); | 1403 | wl += w; |
1375 | 1404 | ||
1376 | if (likely(b)) | 1405 | if (lw > 0 && wl < lw) |
1377 | wl /= b; | 1406 | wl = (wl * tg->shares) / lw; |
1407 | else | ||
1408 | wl = tg->shares; | ||
1378 | 1409 | ||
1379 | /* | 1410 | /* zero point is MIN_SHARES */ |
1380 | * Assume the group is already running and will | 1411 | if (wl < MIN_SHARES) |
1381 | * thus already be accounted for in the weight. | 1412 | wl = MIN_SHARES; |
1382 | * | 1413 | wl -= se->load.weight; |
1383 | * That is, moving shares between CPUs, does not | ||
1384 | * alter the group weight. | ||
1385 | */ | ||
1386 | wg = 0; | 1414 | wg = 0; |
1387 | } | 1415 | } |
1388 | 1416 | ||
@@ -1401,7 +1429,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
1401 | 1429 | ||
1402 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | 1430 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
1403 | { | 1431 | { |
1404 | unsigned long this_load, load; | 1432 | s64 this_load, load; |
1405 | int idx, this_cpu, prev_cpu; | 1433 | int idx, this_cpu, prev_cpu; |
1406 | unsigned long tl_per_task; | 1434 | unsigned long tl_per_task; |
1407 | struct task_group *tg; | 1435 | struct task_group *tg; |
@@ -1440,8 +1468,8 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) | |||
1440 | * Otherwise check if either cpus are near enough in load to allow this | 1468 | * Otherwise check if either cpus are near enough in load to allow this |
1441 | * task to be woken on this_cpu. | 1469 | * task to be woken on this_cpu. |
1442 | */ | 1470 | */ |
1443 | if (this_load) { | 1471 | if (this_load > 0) { |
1444 | unsigned long this_eff_load, prev_eff_load; | 1472 | s64 this_eff_load, prev_eff_load; |
1445 | 1473 | ||
1446 | this_eff_load = 100; | 1474 | this_eff_load = 100; |
1447 | this_eff_load *= power_of(prev_cpu); | 1475 | this_eff_load *= power_of(prev_cpu); |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index c914ec747ca6..ad6267714c84 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -625,7 +625,7 @@ static void update_curr_rt(struct rq *rq) | |||
625 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 625 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
626 | u64 delta_exec; | 626 | u64 delta_exec; |
627 | 627 | ||
628 | if (!task_has_rt_policy(curr)) | 628 | if (curr->sched_class != &rt_sched_class) |
629 | return; | 629 | return; |
630 | 630 | ||
631 | delta_exec = rq->clock_task - curr->se.exec_start; | 631 | delta_exec = rq->clock_task - curr->se.exec_start; |
diff --git a/kernel/smp.c b/kernel/smp.c index 4ec30e069987..9910744f0856 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -194,23 +194,52 @@ void generic_smp_call_function_interrupt(void) | |||
194 | */ | 194 | */ |
195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { | 195 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
196 | int refs; | 196 | int refs; |
197 | void (*func) (void *info); | ||
197 | 198 | ||
198 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) | 199 | /* |
200 | * Since we walk the list without any locks, we might | ||
201 | * see an entry that was completed, removed from the | ||
202 | * list and is in the process of being reused. | ||
203 | * | ||
204 | * We must check that the cpu is in the cpumask before | ||
205 | * checking the refs, and both must be set before | ||
206 | * executing the callback on this cpu. | ||
207 | */ | ||
208 | |||
209 | if (!cpumask_test_cpu(cpu, data->cpumask)) | ||
210 | continue; | ||
211 | |||
212 | smp_rmb(); | ||
213 | |||
214 | if (atomic_read(&data->refs) == 0) | ||
199 | continue; | 215 | continue; |
200 | 216 | ||
217 | func = data->csd.func; /* for later warn */ | ||
201 | data->csd.func(data->csd.info); | 218 | data->csd.func(data->csd.info); |
202 | 219 | ||
220 | /* | ||
221 | * If the cpu mask is not still set then it enabled interrupts, | ||
222 | * we took another smp interrupt, and executed the function | ||
223 | * twice on this cpu. In theory that copy decremented refs. | ||
224 | */ | ||
225 | if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { | ||
226 | WARN(1, "%pS enabled interrupts and double executed\n", | ||
227 | func); | ||
228 | continue; | ||
229 | } | ||
230 | |||
203 | refs = atomic_dec_return(&data->refs); | 231 | refs = atomic_dec_return(&data->refs); |
204 | WARN_ON(refs < 0); | 232 | WARN_ON(refs < 0); |
205 | if (!refs) { | ||
206 | raw_spin_lock(&call_function.lock); | ||
207 | list_del_rcu(&data->csd.list); | ||
208 | raw_spin_unlock(&call_function.lock); | ||
209 | } | ||
210 | 233 | ||
211 | if (refs) | 234 | if (refs) |
212 | continue; | 235 | continue; |
213 | 236 | ||
237 | WARN_ON(!cpumask_empty(data->cpumask)); | ||
238 | |||
239 | raw_spin_lock(&call_function.lock); | ||
240 | list_del_rcu(&data->csd.list); | ||
241 | raw_spin_unlock(&call_function.lock); | ||
242 | |||
214 | csd_unlock(&data->csd); | 243 | csd_unlock(&data->csd); |
215 | } | 244 | } |
216 | 245 | ||
@@ -430,7 +459,7 @@ void smp_call_function_many(const struct cpumask *mask, | |||
430 | * can't happen. | 459 | * can't happen. |
431 | */ | 460 | */ |
432 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() | 461 | WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() |
433 | && !oops_in_progress); | 462 | && !oops_in_progress && !early_boot_irqs_disabled); |
434 | 463 | ||
435 | /* So, what's a CPU they want? Ignoring this one. */ | 464 | /* So, what's a CPU they want? Ignoring this one. */ |
436 | cpu = cpumask_first_and(mask, cpu_online_mask); | 465 | cpu = cpumask_first_and(mask, cpu_online_mask); |
@@ -454,11 +483,21 @@ void smp_call_function_many(const struct cpumask *mask, | |||
454 | 483 | ||
455 | data = &__get_cpu_var(cfd_data); | 484 | data = &__get_cpu_var(cfd_data); |
456 | csd_lock(&data->csd); | 485 | csd_lock(&data->csd); |
486 | BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); | ||
457 | 487 | ||
458 | data->csd.func = func; | 488 | data->csd.func = func; |
459 | data->csd.info = info; | 489 | data->csd.info = info; |
460 | cpumask_and(data->cpumask, mask, cpu_online_mask); | 490 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
461 | cpumask_clear_cpu(this_cpu, data->cpumask); | 491 | cpumask_clear_cpu(this_cpu, data->cpumask); |
492 | |||
493 | /* | ||
494 | * To ensure the interrupt handler gets an complete view | ||
495 | * we order the cpumask and refs writes and order the read | ||
496 | * of them in the interrupt handler. In addition we may | ||
497 | * only clear our own cpu bit from the mask. | ||
498 | */ | ||
499 | smp_wmb(); | ||
500 | |||
462 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); | 501 | atomic_set(&data->refs, cpumask_weight(data->cpumask)); |
463 | 502 | ||
464 | raw_spin_lock_irqsave(&call_function.lock, flags); | 503 | raw_spin_lock_irqsave(&call_function.lock, flags); |
@@ -533,17 +572,20 @@ void ipi_call_unlock_irq(void) | |||
533 | #endif /* USE_GENERIC_SMP_HELPERS */ | 572 | #endif /* USE_GENERIC_SMP_HELPERS */ |
534 | 573 | ||
535 | /* | 574 | /* |
536 | * Call a function on all processors | 575 | * Call a function on all processors. May be used during early boot while |
576 | * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead | ||
577 | * of local_irq_disable/enable(). | ||
537 | */ | 578 | */ |
538 | int on_each_cpu(void (*func) (void *info), void *info, int wait) | 579 | int on_each_cpu(void (*func) (void *info), void *info, int wait) |
539 | { | 580 | { |
581 | unsigned long flags; | ||
540 | int ret = 0; | 582 | int ret = 0; |
541 | 583 | ||
542 | preempt_disable(); | 584 | preempt_disable(); |
543 | ret = smp_call_function(func, info, wait); | 585 | ret = smp_call_function(func, info, wait); |
544 | local_irq_disable(); | 586 | local_irq_save(flags); |
545 | func(info); | 587 | func(info); |
546 | local_irq_enable(); | 588 | local_irq_restore(flags); |
547 | preempt_enable(); | 589 | preempt_enable(); |
548 | return ret; | 590 | return ret; |
549 | } | 591 | } |
diff --git a/kernel/srcu.c b/kernel/srcu.c index 98d8c1e80edb..73ce23feaea9 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
@@ -156,6 +156,16 @@ void __srcu_read_unlock(struct srcu_struct *sp, int idx) | |||
156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
157 | 157 | ||
158 | /* | 158 | /* |
159 | * We use an adaptive strategy for synchronize_srcu() and especially for | ||
160 | * synchronize_srcu_expedited(). We spin for a fixed time period | ||
161 | * (defined below) to allow SRCU readers to exit their read-side critical | ||
162 | * sections. If there are still some readers after 10 microseconds, | ||
163 | * we repeatedly block for 1-millisecond time periods. This approach | ||
164 | * has done well in testing, so there is no need for a config parameter. | ||
165 | */ | ||
166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 | ||
167 | |||
168 | /* | ||
159 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
160 | */ | 170 | */ |
161 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | 171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) |
@@ -207,11 +217,12 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
207 | * will have finished executing. We initially give readers | 217 | * will have finished executing. We initially give readers |
208 | * an arbitrarily chosen 10 microseconds to get out of their | 218 | * an arbitrarily chosen 10 microseconds to get out of their |
209 | * SRCU read-side critical sections, then loop waiting 1/HZ | 219 | * SRCU read-side critical sections, then loop waiting 1/HZ |
210 | * seconds per iteration. | 220 | * seconds per iteration. The 10-microsecond value has done |
221 | * very well in testing. | ||
211 | */ | 222 | */ |
212 | 223 | ||
213 | if (srcu_readers_active_idx(sp, idx)) | 224 | if (srcu_readers_active_idx(sp, idx)) |
214 | udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); | 225 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); |
215 | while (srcu_readers_active_idx(sp, idx)) | 226 | while (srcu_readers_active_idx(sp, idx)) |
216 | schedule_timeout_interruptible(1); | 227 | schedule_timeout_interruptible(1); |
217 | 228 | ||
diff --git a/kernel/sys.c b/kernel/sys.c index 31b71a276b40..18da702ec813 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -1385,7 +1385,8 @@ static int check_prlimit_permission(struct task_struct *task) | |||
1385 | const struct cred *cred = current_cred(), *tcred; | 1385 | const struct cred *cred = current_cred(), *tcred; |
1386 | 1386 | ||
1387 | tcred = __task_cred(task); | 1387 | tcred = __task_cred(task); |
1388 | if ((cred->uid != tcred->euid || | 1388 | if (current != task && |
1389 | (cred->uid != tcred->euid || | ||
1389 | cred->uid != tcred->suid || | 1390 | cred->uid != tcred->suid || |
1390 | cred->uid != tcred->uid || | 1391 | cred->uid != tcred->uid || |
1391 | cred->gid != tcred->egid || | 1392 | cred->gid != tcred->egid || |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bc86bb32e126..0f1bd83db985 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -170,7 +170,8 @@ static int proc_taint(struct ctl_table *table, int write, | |||
170 | #endif | 170 | #endif |
171 | 171 | ||
172 | #ifdef CONFIG_MAGIC_SYSRQ | 172 | #ifdef CONFIG_MAGIC_SYSRQ |
173 | static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ | 173 | /* Note: sysrq code uses it's own private copy */ |
174 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; | ||
174 | 175 | ||
175 | static int sysrq_sysctl_handler(ctl_table *table, int write, | 176 | static int sysrq_sysctl_handler(ctl_table *table, int write, |
176 | void __user *buffer, size_t *lenp, | 177 | void __user *buffer, size_t *lenp, |
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c50a034de30f..6519cf62d9cd 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -113,7 +113,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time); | |||
113 | * @shift: pointer to shift variable | 113 | * @shift: pointer to shift variable |
114 | * @from: frequency to convert from | 114 | * @from: frequency to convert from |
115 | * @to: frequency to convert to | 115 | * @to: frequency to convert to |
116 | * @minsec: guaranteed runtime conversion range in seconds | 116 | * @maxsec: guaranteed runtime conversion range in seconds |
117 | * | 117 | * |
118 | * The function evaluates the shift/mult pair for the scaled math | 118 | * The function evaluates the shift/mult pair for the scaled math |
119 | * operations of clocksources and clockevents. | 119 | * operations of clocksources and clockevents. |
@@ -122,7 +122,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time); | |||
122 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock | 122 | * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock |
123 | * event @to is the counter frequency and @from is NSEC_PER_SEC. | 123 | * event @to is the counter frequency and @from is NSEC_PER_SEC. |
124 | * | 124 | * |
125 | * The @minsec conversion range argument controls the time frame in | 125 | * The @maxsec conversion range argument controls the time frame in |
126 | * seconds which must be covered by the runtime conversion with the | 126 | * seconds which must be covered by the runtime conversion with the |
127 | * calculated mult and shift factors. This guarantees that no 64bit | 127 | * calculated mult and shift factors. This guarantees that no 64bit |
128 | * overflow happens when the input value of the conversion is | 128 | * overflow happens when the input value of the conversion is |
@@ -131,7 +131,7 @@ EXPORT_SYMBOL_GPL(timecounter_cyc2time); | |||
131 | * factors. | 131 | * factors. |
132 | */ | 132 | */ |
133 | void | 133 | void |
134 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) | 134 | clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec) |
135 | { | 135 | { |
136 | u64 tmp; | 136 | u64 tmp; |
137 | u32 sft, sftacc= 32; | 137 | u32 sft, sftacc= 32; |
@@ -140,7 +140,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) | |||
140 | * Calculate the shift factor which is limiting the conversion | 140 | * Calculate the shift factor which is limiting the conversion |
141 | * range: | 141 | * range: |
142 | */ | 142 | */ |
143 | tmp = ((u64)minsec * from) >> 32; | 143 | tmp = ((u64)maxsec * from) >> 32; |
144 | while (tmp) { | 144 | while (tmp) { |
145 | tmp >>=1; | 145 | tmp >>=1; |
146 | sftacc--; | 146 | sftacc--; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3e216e01bbd1..c55ea2433471 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void) | |||
642 | } | 642 | } |
643 | local_irq_enable(); | 643 | local_irq_enable(); |
644 | 644 | ||
645 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", | 645 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); |
646 | smp_processor_id()); | ||
647 | } | 646 | } |
648 | 647 | ||
649 | /* | 648 | /* |
@@ -795,8 +794,10 @@ void tick_setup_sched_timer(void) | |||
795 | } | 794 | } |
796 | 795 | ||
797 | #ifdef CONFIG_NO_HZ | 796 | #ifdef CONFIG_NO_HZ |
798 | if (tick_nohz_enabled) | 797 | if (tick_nohz_enabled) { |
799 | ts->nohz_mode = NOHZ_MODE_HIGHRES; | 798 | ts->nohz_mode = NOHZ_MODE_HIGHRES; |
799 | printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); | ||
800 | } | ||
800 | #endif | 801 | #endif |
801 | } | 802 | } |
802 | #endif /* HIGH_RES_TIMERS */ | 803 | #endif /* HIGH_RES_TIMERS */ |
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 5536aaf3ba36..d27c7562902c 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -49,7 +49,7 @@ struct timekeeper { | |||
49 | u32 mult; | 49 | u32 mult; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | struct timekeeper timekeeper; | 52 | static struct timekeeper timekeeper; |
53 | 53 | ||
54 | /** | 54 | /** |
55 | * timekeeper_setup_internals - Set up internals to use clocksource clock. | 55 | * timekeeper_setup_internals - Set up internals to use clocksource clock. |
@@ -164,7 +164,7 @@ static struct timespec total_sleep_time; | |||
164 | /* | 164 | /* |
165 | * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. | 165 | * The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. |
166 | */ | 166 | */ |
167 | struct timespec raw_time; | 167 | static struct timespec raw_time; |
168 | 168 | ||
169 | /* flag for if timekeeping is suspended */ | 169 | /* flag for if timekeeping is suspended */ |
170 | int __read_mostly timekeeping_suspended; | 170 | int __read_mostly timekeeping_suspended; |
diff --git a/kernel/timer.c b/kernel/timer.c index 43ca9936f2d0..d53ce66daea0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -969,10 +969,14 @@ EXPORT_SYMBOL(try_to_del_timer_sync); | |||
969 | int del_timer_sync(struct timer_list *timer) | 969 | int del_timer_sync(struct timer_list *timer) |
970 | { | 970 | { |
971 | #ifdef CONFIG_LOCKDEP | 971 | #ifdef CONFIG_LOCKDEP |
972 | unsigned long flags; | ||
973 | |||
974 | raw_local_irq_save(flags); | ||
972 | local_bh_disable(); | 975 | local_bh_disable(); |
973 | lock_map_acquire(&timer->lockdep_map); | 976 | lock_map_acquire(&timer->lockdep_map); |
974 | lock_map_release(&timer->lockdep_map); | 977 | lock_map_release(&timer->lockdep_map); |
975 | local_bh_enable(); | 978 | _local_bh_enable(); |
979 | raw_local_irq_restore(flags); | ||
976 | #endif | 980 | #endif |
977 | /* | 981 | /* |
978 | * don't use it in hardirq context, because it | 982 | * don't use it in hardirq context, because it |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 153562d0b93c..d95721f33702 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -138,6 +138,13 @@ void __trace_note_message(struct blk_trace *bt, const char *fmt, ...) | |||
138 | !blk_tracer_enabled)) | 138 | !blk_tracer_enabled)) |
139 | return; | 139 | return; |
140 | 140 | ||
141 | /* | ||
142 | * If the BLK_TC_NOTIFY action mask isn't set, don't send any note | ||
143 | * message to the trace. | ||
144 | */ | ||
145 | if (!(bt->act_mask & BLK_TC_NOTIFY)) | ||
146 | return; | ||
147 | |||
141 | local_irq_save(flags); | 148 | local_irq_save(flags); |
142 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); | 149 | buf = per_cpu_ptr(bt->msg_data, smp_processor_id()); |
143 | va_start(args, fmt); | 150 | va_start(args, fmt); |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 35fde09b81de..5f499e0438a4 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
@@ -1284,7 +1284,7 @@ trace_create_file_ops(struct module *mod) | |||
1284 | static void trace_module_add_events(struct module *mod) | 1284 | static void trace_module_add_events(struct module *mod) |
1285 | { | 1285 | { |
1286 | struct ftrace_module_file_ops *file_ops = NULL; | 1286 | struct ftrace_module_file_ops *file_ops = NULL; |
1287 | struct ftrace_event_call *call, *start, *end; | 1287 | struct ftrace_event_call **call, **start, **end; |
1288 | 1288 | ||
1289 | start = mod->trace_events; | 1289 | start = mod->trace_events; |
1290 | end = mod->trace_events + mod->num_trace_events; | 1290 | end = mod->trace_events + mod->num_trace_events; |
@@ -1297,7 +1297,7 @@ static void trace_module_add_events(struct module *mod) | |||
1297 | return; | 1297 | return; |
1298 | 1298 | ||
1299 | for_each_event(call, start, end) { | 1299 | for_each_event(call, start, end) { |
1300 | __trace_add_event_call(call, mod, | 1300 | __trace_add_event_call(*call, mod, |
1301 | &file_ops->id, &file_ops->enable, | 1301 | &file_ops->id, &file_ops->enable, |
1302 | &file_ops->filter, &file_ops->format); | 1302 | &file_ops->filter, &file_ops->format); |
1303 | } | 1303 | } |
@@ -1367,8 +1367,8 @@ static struct notifier_block trace_module_nb = { | |||
1367 | .priority = 0, | 1367 | .priority = 0, |
1368 | }; | 1368 | }; |
1369 | 1369 | ||
1370 | extern struct ftrace_event_call __start_ftrace_events[]; | 1370 | extern struct ftrace_event_call *__start_ftrace_events[]; |
1371 | extern struct ftrace_event_call __stop_ftrace_events[]; | 1371 | extern struct ftrace_event_call *__stop_ftrace_events[]; |
1372 | 1372 | ||
1373 | static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; | 1373 | static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata; |
1374 | 1374 | ||
@@ -1384,7 +1384,7 @@ __setup("trace_event=", setup_trace_event); | |||
1384 | 1384 | ||
1385 | static __init int event_trace_init(void) | 1385 | static __init int event_trace_init(void) |
1386 | { | 1386 | { |
1387 | struct ftrace_event_call *call; | 1387 | struct ftrace_event_call **call; |
1388 | struct dentry *d_tracer; | 1388 | struct dentry *d_tracer; |
1389 | struct dentry *entry; | 1389 | struct dentry *entry; |
1390 | struct dentry *d_events; | 1390 | struct dentry *d_events; |
@@ -1430,7 +1430,7 @@ static __init int event_trace_init(void) | |||
1430 | pr_warning("tracing: Failed to allocate common fields"); | 1430 | pr_warning("tracing: Failed to allocate common fields"); |
1431 | 1431 | ||
1432 | for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { | 1432 | for_each_event(call, __start_ftrace_events, __stop_ftrace_events) { |
1433 | __trace_add_event_call(call, NULL, &ftrace_event_id_fops, | 1433 | __trace_add_event_call(*call, NULL, &ftrace_event_id_fops, |
1434 | &ftrace_enable_fops, | 1434 | &ftrace_enable_fops, |
1435 | &ftrace_event_filter_fops, | 1435 | &ftrace_event_filter_fops, |
1436 | &ftrace_event_format_fops); | 1436 | &ftrace_event_format_fops); |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4b74d71705c0..bbeec31e0ae3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
@@ -161,13 +161,13 @@ struct ftrace_event_class event_class_ftrace_##call = { \ | |||
161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ | 161 | .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\ |
162 | }; \ | 162 | }; \ |
163 | \ | 163 | \ |
164 | struct ftrace_event_call __used \ | 164 | struct ftrace_event_call __used event_##call = { \ |
165 | __attribute__((__aligned__(4))) \ | ||
166 | __attribute__((section("_ftrace_events"))) event_##call = { \ | ||
167 | .name = #call, \ | 165 | .name = #call, \ |
168 | .event.type = etype, \ | 166 | .event.type = etype, \ |
169 | .class = &event_class_ftrace_##call, \ | 167 | .class = &event_class_ftrace_##call, \ |
170 | .print_fmt = print, \ | 168 | .print_fmt = print, \ |
171 | }; \ | 169 | }; \ |
170 | struct ftrace_event_call __used \ | ||
171 | __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call; | ||
172 | 172 | ||
173 | #include "trace_entries.h" | 173 | #include "trace_entries.h" |
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5cf8c602b880..92b6e1e12d98 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c | |||
@@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) | |||
453 | * Stubs: | 453 | * Stubs: |
454 | */ | 454 | */ |
455 | 455 | ||
456 | void early_boot_irqs_off(void) | ||
457 | { | ||
458 | } | ||
459 | |||
460 | void early_boot_irqs_on(void) | ||
461 | { | ||
462 | } | ||
463 | |||
464 | void trace_softirqs_on(unsigned long ip) | 456 | void trace_softirqs_on(unsigned long ip) |
465 | { | 457 | { |
466 | } | 458 | } |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index bac752f0cfb5..5c9fe08d2093 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -23,9 +23,6 @@ static int syscall_exit_register(struct ftrace_event_call *event, | |||
23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); | 23 | static int syscall_enter_define_fields(struct ftrace_event_call *call); |
24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); | 24 | static int syscall_exit_define_fields(struct ftrace_event_call *call); |
25 | 25 | ||
26 | /* All syscall exit events have the same fields */ | ||
27 | static LIST_HEAD(syscall_exit_fields); | ||
28 | |||
29 | static struct list_head * | 26 | static struct list_head * |
30 | syscall_get_enter_fields(struct ftrace_event_call *call) | 27 | syscall_get_enter_fields(struct ftrace_event_call *call) |
31 | { | 28 | { |
@@ -34,50 +31,45 @@ syscall_get_enter_fields(struct ftrace_event_call *call) | |||
34 | return &entry->enter_fields; | 31 | return &entry->enter_fields; |
35 | } | 32 | } |
36 | 33 | ||
37 | static struct list_head * | ||
38 | syscall_get_exit_fields(struct ftrace_event_call *call) | ||
39 | { | ||
40 | return &syscall_exit_fields; | ||
41 | } | ||
42 | |||
43 | struct trace_event_functions enter_syscall_print_funcs = { | 34 | struct trace_event_functions enter_syscall_print_funcs = { |
44 | .trace = print_syscall_enter, | 35 | .trace = print_syscall_enter, |
45 | }; | 36 | }; |
46 | 37 | ||
47 | struct trace_event_functions exit_syscall_print_funcs = { | 38 | struct trace_event_functions exit_syscall_print_funcs = { |
48 | .trace = print_syscall_exit, | 39 | .trace = print_syscall_exit, |
49 | }; | 40 | }; |
50 | 41 | ||
51 | struct ftrace_event_class event_class_syscall_enter = { | 42 | struct ftrace_event_class event_class_syscall_enter = { |
52 | .system = "syscalls", | 43 | .system = "syscalls", |
53 | .reg = syscall_enter_register, | 44 | .reg = syscall_enter_register, |
54 | .define_fields = syscall_enter_define_fields, | 45 | .define_fields = syscall_enter_define_fields, |
55 | .get_fields = syscall_get_enter_fields, | 46 | .get_fields = syscall_get_enter_fields, |
56 | .raw_init = init_syscall_trace, | 47 | .raw_init = init_syscall_trace, |
57 | }; | 48 | }; |
58 | 49 | ||
59 | struct ftrace_event_class event_class_syscall_exit = { | 50 | struct ftrace_event_class event_class_syscall_exit = { |
60 | .system = "syscalls", | 51 | .system = "syscalls", |
61 | .reg = syscall_exit_register, | 52 | .reg = syscall_exit_register, |
62 | .define_fields = syscall_exit_define_fields, | 53 | .define_fields = syscall_exit_define_fields, |
63 | .get_fields = syscall_get_exit_fields, | 54 | .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), |
64 | .raw_init = init_syscall_trace, | 55 | .raw_init = init_syscall_trace, |
65 | }; | 56 | }; |
66 | 57 | ||
67 | extern unsigned long __start_syscalls_metadata[]; | 58 | extern struct syscall_metadata *__start_syscalls_metadata[]; |
68 | extern unsigned long __stop_syscalls_metadata[]; | 59 | extern struct syscall_metadata *__stop_syscalls_metadata[]; |
69 | 60 | ||
70 | static struct syscall_metadata **syscalls_metadata; | 61 | static struct syscall_metadata **syscalls_metadata; |
71 | 62 | ||
72 | static struct syscall_metadata *find_syscall_meta(unsigned long syscall) | 63 | static __init struct syscall_metadata * |
64 | find_syscall_meta(unsigned long syscall) | ||
73 | { | 65 | { |
74 | struct syscall_metadata *start; | 66 | struct syscall_metadata **start; |
75 | struct syscall_metadata *stop; | 67 | struct syscall_metadata **stop; |
76 | char str[KSYM_SYMBOL_LEN]; | 68 | char str[KSYM_SYMBOL_LEN]; |
77 | 69 | ||
78 | 70 | ||
79 | start = (struct syscall_metadata *)__start_syscalls_metadata; | 71 | start = __start_syscalls_metadata; |
80 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | 72 | stop = __stop_syscalls_metadata; |
81 | kallsyms_lookup(syscall, NULL, NULL, NULL, str); | 73 | kallsyms_lookup(syscall, NULL, NULL, NULL, str); |
82 | 74 | ||
83 | for ( ; start < stop; start++) { | 75 | for ( ; start < stop; start++) { |
@@ -87,8 +79,8 @@ static struct syscall_metadata *find_syscall_meta(unsigned long syscall) | |||
87 | * with "SyS" instead of "sys", leading to an unwanted | 79 | * with "SyS" instead of "sys", leading to an unwanted |
88 | * mismatch. | 80 | * mismatch. |
89 | */ | 81 | */ |
90 | if (start->name && !strcmp(start->name + 3, str + 3)) | 82 | if ((*start)->name && !strcmp((*start)->name + 3, str + 3)) |
91 | return start; | 83 | return *start; |
92 | } | 84 | } |
93 | return NULL; | 85 | return NULL; |
94 | } | 86 | } |
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index e95ee7f31d43..68187af4889e 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c | |||
@@ -27,8 +27,8 @@ | |||
27 | #include <linux/sched.h> | 27 | #include <linux/sched.h> |
28 | #include <linux/jump_label.h> | 28 | #include <linux/jump_label.h> |
29 | 29 | ||
30 | extern struct tracepoint __start___tracepoints[]; | 30 | extern struct tracepoint * const __start___tracepoints_ptrs[]; |
31 | extern struct tracepoint __stop___tracepoints[]; | 31 | extern struct tracepoint * const __stop___tracepoints_ptrs[]; |
32 | 32 | ||
33 | /* Set to 1 to enable tracepoint debug output */ | 33 | /* Set to 1 to enable tracepoint debug output */ |
34 | static const int tracepoint_debug; | 34 | static const int tracepoint_debug; |
@@ -298,10 +298,10 @@ static void disable_tracepoint(struct tracepoint *elem) | |||
298 | * | 298 | * |
299 | * Updates the probe callback corresponding to a range of tracepoints. | 299 | * Updates the probe callback corresponding to a range of tracepoints. |
300 | */ | 300 | */ |
301 | void | 301 | void tracepoint_update_probe_range(struct tracepoint * const *begin, |
302 | tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) | 302 | struct tracepoint * const *end) |
303 | { | 303 | { |
304 | struct tracepoint *iter; | 304 | struct tracepoint * const *iter; |
305 | struct tracepoint_entry *mark_entry; | 305 | struct tracepoint_entry *mark_entry; |
306 | 306 | ||
307 | if (!begin) | 307 | if (!begin) |
@@ -309,12 +309,12 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) | |||
309 | 309 | ||
310 | mutex_lock(&tracepoints_mutex); | 310 | mutex_lock(&tracepoints_mutex); |
311 | for (iter = begin; iter < end; iter++) { | 311 | for (iter = begin; iter < end; iter++) { |
312 | mark_entry = get_tracepoint(iter->name); | 312 | mark_entry = get_tracepoint((*iter)->name); |
313 | if (mark_entry) { | 313 | if (mark_entry) { |
314 | set_tracepoint(&mark_entry, iter, | 314 | set_tracepoint(&mark_entry, *iter, |
315 | !!mark_entry->refcount); | 315 | !!mark_entry->refcount); |
316 | } else { | 316 | } else { |
317 | disable_tracepoint(iter); | 317 | disable_tracepoint(*iter); |
318 | } | 318 | } |
319 | } | 319 | } |
320 | mutex_unlock(&tracepoints_mutex); | 320 | mutex_unlock(&tracepoints_mutex); |
@@ -326,8 +326,8 @@ tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end) | |||
326 | static void tracepoint_update_probes(void) | 326 | static void tracepoint_update_probes(void) |
327 | { | 327 | { |
328 | /* Core kernel tracepoints */ | 328 | /* Core kernel tracepoints */ |
329 | tracepoint_update_probe_range(__start___tracepoints, | 329 | tracepoint_update_probe_range(__start___tracepoints_ptrs, |
330 | __stop___tracepoints); | 330 | __stop___tracepoints_ptrs); |
331 | /* tracepoints in modules. */ | 331 | /* tracepoints in modules. */ |
332 | module_update_tracepoints(); | 332 | module_update_tracepoints(); |
333 | } | 333 | } |
@@ -514,8 +514,8 @@ EXPORT_SYMBOL_GPL(tracepoint_probe_update_all); | |||
514 | * Will return the first tracepoint in the range if the input tracepoint is | 514 | * Will return the first tracepoint in the range if the input tracepoint is |
515 | * NULL. | 515 | * NULL. |
516 | */ | 516 | */ |
517 | int tracepoint_get_iter_range(struct tracepoint **tracepoint, | 517 | int tracepoint_get_iter_range(struct tracepoint * const **tracepoint, |
518 | struct tracepoint *begin, struct tracepoint *end) | 518 | struct tracepoint * const *begin, struct tracepoint * const *end) |
519 | { | 519 | { |
520 | if (!*tracepoint && begin != end) { | 520 | if (!*tracepoint && begin != end) { |
521 | *tracepoint = begin; | 521 | *tracepoint = begin; |
@@ -534,7 +534,8 @@ static void tracepoint_get_iter(struct tracepoint_iter *iter) | |||
534 | /* Core kernel tracepoints */ | 534 | /* Core kernel tracepoints */ |
535 | if (!iter->module) { | 535 | if (!iter->module) { |
536 | found = tracepoint_get_iter_range(&iter->tracepoint, | 536 | found = tracepoint_get_iter_range(&iter->tracepoint, |
537 | __start___tracepoints, __stop___tracepoints); | 537 | __start___tracepoints_ptrs, |
538 | __stop___tracepoints_ptrs); | ||
538 | if (found) | 539 | if (found) |
539 | goto end; | 540 | goto end; |
540 | } | 541 | } |
@@ -585,8 +586,8 @@ int tracepoint_module_notify(struct notifier_block *self, | |||
585 | switch (val) { | 586 | switch (val) { |
586 | case MODULE_STATE_COMING: | 587 | case MODULE_STATE_COMING: |
587 | case MODULE_STATE_GOING: | 588 | case MODULE_STATE_GOING: |
588 | tracepoint_update_probe_range(mod->tracepoints, | 589 | tracepoint_update_probe_range(mod->tracepoints_ptrs, |
589 | mod->tracepoints + mod->num_tracepoints); | 590 | mod->tracepoints_ptrs + mod->num_tracepoints); |
590 | break; | 591 | break; |
591 | } | 592 | } |
592 | return 0; | 593 | return 0; |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index d7ebdf4cea98..f37f974aa81b 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
28 | #include <linux/perf_event.h> | 28 | #include <linux/perf_event.h> |
29 | 29 | ||
30 | int watchdog_enabled; | 30 | int watchdog_enabled = 1; |
31 | int __read_mostly softlockup_thresh = 60; | 31 | int __read_mostly softlockup_thresh = 60; |
32 | 32 | ||
33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int no_watchdog; | ||
47 | |||
48 | |||
49 | /* boot commands */ | 46 | /* boot commands */ |
50 | /* | 47 | /* |
51 | * Should we panic when a soft-lockup or hard-lockup occurs: | 48 | * Should we panic when a soft-lockup or hard-lockup occurs: |
@@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str) | |||
58 | if (!strncmp(str, "panic", 5)) | 55 | if (!strncmp(str, "panic", 5)) |
59 | hardlockup_panic = 1; | 56 | hardlockup_panic = 1; |
60 | else if (!strncmp(str, "0", 1)) | 57 | else if (!strncmp(str, "0", 1)) |
61 | no_watchdog = 1; | 58 | watchdog_enabled = 0; |
62 | return 1; | 59 | return 1; |
63 | } | 60 | } |
64 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 61 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
@@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
77 | 74 | ||
78 | static int __init nowatchdog_setup(char *str) | 75 | static int __init nowatchdog_setup(char *str) |
79 | { | 76 | { |
80 | no_watchdog = 1; | 77 | watchdog_enabled = 0; |
81 | return 1; | 78 | return 1; |
82 | } | 79 | } |
83 | __setup("nowatchdog", nowatchdog_setup); | 80 | __setup("nowatchdog", nowatchdog_setup); |
@@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup); | |||
85 | /* deprecated */ | 82 | /* deprecated */ |
86 | static int __init nosoftlockup_setup(char *str) | 83 | static int __init nosoftlockup_setup(char *str) |
87 | { | 84 | { |
88 | no_watchdog = 1; | 85 | watchdog_enabled = 0; |
89 | return 1; | 86 | return 1; |
90 | } | 87 | } |
91 | __setup("nosoftlockup", nosoftlockup_setup); | 88 | __setup("nosoftlockup", nosoftlockup_setup); |
@@ -432,9 +429,6 @@ static int watchdog_enable(int cpu) | |||
432 | wake_up_process(p); | 429 | wake_up_process(p); |
433 | } | 430 | } |
434 | 431 | ||
435 | /* if any cpu succeeds, watchdog is considered enabled for the system */ | ||
436 | watchdog_enabled = 1; | ||
437 | |||
438 | return 0; | 432 | return 0; |
439 | } | 433 | } |
440 | 434 | ||
@@ -462,12 +456,16 @@ static void watchdog_disable(int cpu) | |||
462 | static void watchdog_enable_all_cpus(void) | 456 | static void watchdog_enable_all_cpus(void) |
463 | { | 457 | { |
464 | int cpu; | 458 | int cpu; |
465 | int result = 0; | 459 | |
460 | watchdog_enabled = 0; | ||
466 | 461 | ||
467 | for_each_online_cpu(cpu) | 462 | for_each_online_cpu(cpu) |
468 | result += watchdog_enable(cpu); | 463 | if (!watchdog_enable(cpu)) |
464 | /* if any cpu succeeds, watchdog is considered | ||
465 | enabled for the system */ | ||
466 | watchdog_enabled = 1; | ||
469 | 467 | ||
470 | if (result) | 468 | if (!watchdog_enabled) |
471 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); | 469 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); |
472 | 470 | ||
473 | } | 471 | } |
@@ -476,9 +474,6 @@ static void watchdog_disable_all_cpus(void) | |||
476 | { | 474 | { |
477 | int cpu; | 475 | int cpu; |
478 | 476 | ||
479 | if (no_watchdog) | ||
480 | return; | ||
481 | |||
482 | for_each_online_cpu(cpu) | 477 | for_each_online_cpu(cpu) |
483 | watchdog_disable(cpu); | 478 | watchdog_disable(cpu); |
484 | 479 | ||
@@ -498,10 +493,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, | |||
498 | { | 493 | { |
499 | proc_dointvec(table, write, buffer, length, ppos); | 494 | proc_dointvec(table, write, buffer, length, ppos); |
500 | 495 | ||
501 | if (watchdog_enabled) | 496 | if (write) { |
502 | watchdog_enable_all_cpus(); | 497 | if (watchdog_enabled) |
503 | else | 498 | watchdog_enable_all_cpus(); |
504 | watchdog_disable_all_cpus(); | 499 | else |
500 | watchdog_disable_all_cpus(); | ||
501 | } | ||
505 | return 0; | 502 | return 0; |
506 | } | 503 | } |
507 | 504 | ||
@@ -530,7 +527,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
530 | break; | 527 | break; |
531 | case CPU_ONLINE: | 528 | case CPU_ONLINE: |
532 | case CPU_ONLINE_FROZEN: | 529 | case CPU_ONLINE_FROZEN: |
533 | err = watchdog_enable(hotcpu); | 530 | if (watchdog_enabled) |
531 | err = watchdog_enable(hotcpu); | ||
534 | break; | 532 | break; |
535 | #ifdef CONFIG_HOTPLUG_CPU | 533 | #ifdef CONFIG_HOTPLUG_CPU |
536 | case CPU_UP_CANCELED: | 534 | case CPU_UP_CANCELED: |
@@ -555,9 +553,6 @@ void __init lockup_detector_init(void) | |||
555 | void *cpu = (void *)(long)smp_processor_id(); | 553 | void *cpu = (void *)(long)smp_processor_id(); |
556 | int err; | 554 | int err; |
557 | 555 | ||
558 | if (no_watchdog) | ||
559 | return; | ||
560 | |||
561 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 556 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
562 | WARN_ON(notifier_to_errno(err)); | 557 | WARN_ON(notifier_to_errno(err)); |
563 | 558 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8ee6ec82f88a..11869faa6819 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -768,7 +768,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) | |||
768 | 768 | ||
769 | worker->flags &= ~flags; | 769 | worker->flags &= ~flags; |
770 | 770 | ||
771 | /* if transitioning out of NOT_RUNNING, increment nr_running */ | 771 | /* |
772 | * If transitioning out of NOT_RUNNING, increment nr_running. Note | ||
773 | * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask | ||
774 | * of multiple flags, not a single flag. | ||
775 | */ | ||
772 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) | 776 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) |
773 | if (!(worker->flags & WORKER_NOT_RUNNING)) | 777 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
774 | atomic_inc(get_gcwq_nr_running(gcwq->cpu)); | 778 | atomic_inc(get_gcwq_nr_running(gcwq->cpu)); |
@@ -1840,7 +1844,7 @@ __acquires(&gcwq->lock) | |||
1840 | spin_unlock_irq(&gcwq->lock); | 1844 | spin_unlock_irq(&gcwq->lock); |
1841 | 1845 | ||
1842 | work_clear_pending(work); | 1846 | work_clear_pending(work); |
1843 | lock_map_acquire(&cwq->wq->lockdep_map); | 1847 | lock_map_acquire_read(&cwq->wq->lockdep_map); |
1844 | lock_map_acquire(&lockdep_map); | 1848 | lock_map_acquire(&lockdep_map); |
1845 | trace_workqueue_execute_start(work); | 1849 | trace_workqueue_execute_start(work); |
1846 | f(work); | 1850 | f(work); |
@@ -2384,8 +2388,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, | |||
2384 | insert_wq_barrier(cwq, barr, work, worker); | 2388 | insert_wq_barrier(cwq, barr, work, worker); |
2385 | spin_unlock_irq(&gcwq->lock); | 2389 | spin_unlock_irq(&gcwq->lock); |
2386 | 2390 | ||
2387 | lock_map_acquire(&cwq->wq->lockdep_map); | 2391 | /* |
2392 | * If @max_active is 1 or rescuer is in use, flushing another work | ||
2393 | * item on the same workqueue may lead to deadlock. Make sure the | ||
2394 | * flusher is not running on the same workqueue by verifying write | ||
2395 | * access. | ||
2396 | */ | ||
2397 | if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) | ||
2398 | lock_map_acquire(&cwq->wq->lockdep_map); | ||
2399 | else | ||
2400 | lock_map_acquire_read(&cwq->wq->lockdep_map); | ||
2388 | lock_map_release(&cwq->wq->lockdep_map); | 2401 | lock_map_release(&cwq->wq->lockdep_map); |
2402 | |||
2389 | return true; | 2403 | return true; |
2390 | already_gone: | 2404 | already_gone: |
2391 | spin_unlock_irq(&gcwq->lock); | 2405 | spin_unlock_irq(&gcwq->lock); |