diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/acct.c | 17 | ||||
-rw-r--r-- | kernel/cgroup.c | 62 | ||||
-rw-r--r-- | kernel/fork.c | 1 | ||||
-rw-r--r-- | kernel/kexec.c | 6 | ||||
-rw-r--r-- | kernel/lockdep.c | 84 | ||||
-rw-r--r-- | kernel/lockdep_internals.h | 72 | ||||
-rw-r--r-- | kernel/lockdep_proc.c | 58 | ||||
-rw-r--r-- | kernel/perf_event.c | 389 | ||||
-rw-r--r-- | kernel/profile.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 11 | ||||
-rw-r--r-- | kernel/rcupdate.c | 30 | ||||
-rw-r--r-- | kernel/rcutiny.c | 35 | ||||
-rw-r--r-- | kernel/rcutiny_plugin.h | 39 | ||||
-rw-r--r-- | kernel/rcutorture.c | 2 | ||||
-rw-r--r-- | kernel/rcutree.c | 131 | ||||
-rw-r--r-- | kernel/rcutree.h | 2 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 69 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 4 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 | ||||
-rw-r--r-- | kernel/softirq.c | 2 | ||||
-rw-r--r-- | kernel/stop_machine.c | 5 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 190 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 17 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 17 |
25 files changed, 762 insertions, 489 deletions
diff --git a/kernel/acct.c b/kernel/acct.c index 24f8c81fc48d..e4c0e1fee9b0 100644 --- a/kernel/acct.c +++ b/kernel/acct.c | |||
@@ -353,17 +353,18 @@ restart: | |||
353 | 353 | ||
354 | void acct_exit_ns(struct pid_namespace *ns) | 354 | void acct_exit_ns(struct pid_namespace *ns) |
355 | { | 355 | { |
356 | struct bsd_acct_struct *acct; | 356 | struct bsd_acct_struct *acct = ns->bacct; |
357 | 357 | ||
358 | spin_lock(&acct_lock); | 358 | if (acct == NULL) |
359 | acct = ns->bacct; | 359 | return; |
360 | if (acct != NULL) { | ||
361 | if (acct->file != NULL) | ||
362 | acct_file_reopen(acct, NULL, NULL); | ||
363 | 360 | ||
364 | kfree(acct); | 361 | del_timer_sync(&acct->timer); |
365 | } | 362 | spin_lock(&acct_lock); |
363 | if (acct->file != NULL) | ||
364 | acct_file_reopen(acct, NULL, NULL); | ||
366 | spin_unlock(&acct_lock); | 365 | spin_unlock(&acct_lock); |
366 | |||
367 | kfree(acct); | ||
367 | } | 368 | } |
368 | 369 | ||
369 | /* | 370 | /* |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4a07d057a265..e9ec642932ee 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -1646,7 +1646,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
1646 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1646 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
1647 | { | 1647 | { |
1648 | char *start; | 1648 | char *start; |
1649 | struct dentry *dentry = rcu_dereference(cgrp->dentry); | 1649 | struct dentry *dentry = rcu_dereference_check(cgrp->dentry, |
1650 | rcu_read_lock_held() || | ||
1651 | cgroup_lock_is_held()); | ||
1650 | 1652 | ||
1651 | if (!dentry || cgrp == dummytop) { | 1653 | if (!dentry || cgrp == dummytop) { |
1652 | /* | 1654 | /* |
@@ -1662,13 +1664,17 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1662 | *--start = '\0'; | 1664 | *--start = '\0'; |
1663 | for (;;) { | 1665 | for (;;) { |
1664 | int len = dentry->d_name.len; | 1666 | int len = dentry->d_name.len; |
1667 | |||
1665 | if ((start -= len) < buf) | 1668 | if ((start -= len) < buf) |
1666 | return -ENAMETOOLONG; | 1669 | return -ENAMETOOLONG; |
1667 | memcpy(start, cgrp->dentry->d_name.name, len); | 1670 | memcpy(start, dentry->d_name.name, len); |
1668 | cgrp = cgrp->parent; | 1671 | cgrp = cgrp->parent; |
1669 | if (!cgrp) | 1672 | if (!cgrp) |
1670 | break; | 1673 | break; |
1671 | dentry = rcu_dereference(cgrp->dentry); | 1674 | |
1675 | dentry = rcu_dereference_check(cgrp->dentry, | ||
1676 | rcu_read_lock_held() || | ||
1677 | cgroup_lock_is_held()); | ||
1672 | if (!cgrp->parent) | 1678 | if (!cgrp->parent) |
1673 | continue; | 1679 | continue; |
1674 | if (--start < buf) | 1680 | if (--start < buf) |
@@ -4429,7 +4435,15 @@ __setup("cgroup_disable=", cgroup_disable); | |||
4429 | */ | 4435 | */ |
4430 | unsigned short css_id(struct cgroup_subsys_state *css) | 4436 | unsigned short css_id(struct cgroup_subsys_state *css) |
4431 | { | 4437 | { |
4432 | struct css_id *cssid = rcu_dereference(css->id); | 4438 | struct css_id *cssid; |
4439 | |||
4440 | /* | ||
4441 | * This css_id() can return correct value when somone has refcnt | ||
4442 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | ||
4443 | * it's unchanged until freed. | ||
4444 | */ | ||
4445 | cssid = rcu_dereference_check(css->id, | ||
4446 | rcu_read_lock_held() || atomic_read(&css->refcnt)); | ||
4433 | 4447 | ||
4434 | if (cssid) | 4448 | if (cssid) |
4435 | return cssid->id; | 4449 | return cssid->id; |
@@ -4439,7 +4453,10 @@ EXPORT_SYMBOL_GPL(css_id); | |||
4439 | 4453 | ||
4440 | unsigned short css_depth(struct cgroup_subsys_state *css) | 4454 | unsigned short css_depth(struct cgroup_subsys_state *css) |
4441 | { | 4455 | { |
4442 | struct css_id *cssid = rcu_dereference(css->id); | 4456 | struct css_id *cssid; |
4457 | |||
4458 | cssid = rcu_dereference_check(css->id, | ||
4459 | rcu_read_lock_held() || atomic_read(&css->refcnt)); | ||
4443 | 4460 | ||
4444 | if (cssid) | 4461 | if (cssid) |
4445 | return cssid->depth; | 4462 | return cssid->depth; |
@@ -4447,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css) | |||
4447 | } | 4464 | } |
4448 | EXPORT_SYMBOL_GPL(css_depth); | 4465 | EXPORT_SYMBOL_GPL(css_depth); |
4449 | 4466 | ||
4467 | /** | ||
4468 | * css_is_ancestor - test "root" css is an ancestor of "child" | ||
4469 | * @child: the css to be tested. | ||
4470 | * @root: the css supporsed to be an ancestor of the child. | ||
4471 | * | ||
4472 | * Returns true if "root" is an ancestor of "child" in its hierarchy. Because | ||
4473 | * this function reads css->id, this use rcu_dereference() and rcu_read_lock(). | ||
4474 | * But, considering usual usage, the csses should be valid objects after test. | ||
4475 | * Assuming that the caller will do some action to the child if this returns | ||
4476 | * returns true, the caller must take "child";s reference count. | ||
4477 | * If "child" is valid object and this returns true, "root" is valid, too. | ||
4478 | */ | ||
4479 | |||
4450 | bool css_is_ancestor(struct cgroup_subsys_state *child, | 4480 | bool css_is_ancestor(struct cgroup_subsys_state *child, |
4451 | const struct cgroup_subsys_state *root) | 4481 | const struct cgroup_subsys_state *root) |
4452 | { | 4482 | { |
4453 | struct css_id *child_id = rcu_dereference(child->id); | 4483 | struct css_id *child_id; |
4454 | struct css_id *root_id = rcu_dereference(root->id); | 4484 | struct css_id *root_id; |
4485 | bool ret = true; | ||
4455 | 4486 | ||
4456 | if (!child_id || !root_id || (child_id->depth < root_id->depth)) | 4487 | rcu_read_lock(); |
4457 | return false; | 4488 | child_id = rcu_dereference(child->id); |
4458 | return child_id->stack[root_id->depth] == root_id->id; | 4489 | root_id = rcu_dereference(root->id); |
4490 | if (!child_id | ||
4491 | || !root_id | ||
4492 | || (child_id->depth < root_id->depth) | ||
4493 | || (child_id->stack[root_id->depth] != root_id->id)) | ||
4494 | ret = false; | ||
4495 | rcu_read_unlock(); | ||
4496 | return ret; | ||
4459 | } | 4497 | } |
4460 | 4498 | ||
4461 | static void __free_css_id_cb(struct rcu_head *head) | 4499 | static void __free_css_id_cb(struct rcu_head *head) |
@@ -4555,13 +4593,13 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | |||
4555 | { | 4593 | { |
4556 | int subsys_id, i, depth = 0; | 4594 | int subsys_id, i, depth = 0; |
4557 | struct cgroup_subsys_state *parent_css, *child_css; | 4595 | struct cgroup_subsys_state *parent_css, *child_css; |
4558 | struct css_id *child_id, *parent_id = NULL; | 4596 | struct css_id *child_id, *parent_id; |
4559 | 4597 | ||
4560 | subsys_id = ss->subsys_id; | 4598 | subsys_id = ss->subsys_id; |
4561 | parent_css = parent->subsys[subsys_id]; | 4599 | parent_css = parent->subsys[subsys_id]; |
4562 | child_css = child->subsys[subsys_id]; | 4600 | child_css = child->subsys[subsys_id]; |
4563 | depth = css_depth(parent_css) + 1; | ||
4564 | parent_id = parent_css->id; | 4601 | parent_id = parent_css->id; |
4602 | depth = parent_id->depth; | ||
4565 | 4603 | ||
4566 | child_id = get_new_cssid(ss, depth); | 4604 | child_id = get_new_cssid(ss, depth); |
4567 | if (IS_ERR(child_id)) | 4605 | if (IS_ERR(child_id)) |
diff --git a/kernel/fork.c b/kernel/fork.c index 5d3592deaf71..4d57d9e3a6e9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1111,7 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1111 | p->memcg_batch.do_batch = 0; | 1111 | p->memcg_batch.do_batch = 0; |
1112 | p->memcg_batch.memcg = NULL; | 1112 | p->memcg_batch.memcg = NULL; |
1113 | #endif | 1113 | #endif |
1114 | p->stack_start = stack_start; | ||
1115 | 1114 | ||
1116 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1115 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1117 | sched_fork(p, clone_flags); | 1116 | sched_fork(p, clone_flags); |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 87ebe8adc474..474a84715eac 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1134,11 +1134,9 @@ int crash_shrink_memory(unsigned long new_size) | |||
1134 | 1134 | ||
1135 | free_reserved_phys_range(end, crashk_res.end); | 1135 | free_reserved_phys_range(end, crashk_res.end); |
1136 | 1136 | ||
1137 | if (start == end) { | 1137 | if (start == end) |
1138 | crashk_res.end = end; | ||
1139 | release_resource(&crashk_res); | 1138 | release_resource(&crashk_res); |
1140 | } else | 1139 | crashk_res.end = end - 1; |
1141 | crashk_res.end = end - 1; | ||
1142 | 1140 | ||
1143 | unlock: | 1141 | unlock: |
1144 | mutex_unlock(&kexec_mutex); | 1142 | mutex_unlock(&kexec_mutex); |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index e9c759f06c1d..ec21304856d1 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -431,20 +431,7 @@ static struct stack_trace lockdep_init_trace = { | |||
431 | /* | 431 | /* |
432 | * Various lockdep statistics: | 432 | * Various lockdep statistics: |
433 | */ | 433 | */ |
434 | atomic_t chain_lookup_hits; | 434 | DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats); |
435 | atomic_t chain_lookup_misses; | ||
436 | atomic_t hardirqs_on_events; | ||
437 | atomic_t hardirqs_off_events; | ||
438 | atomic_t redundant_hardirqs_on; | ||
439 | atomic_t redundant_hardirqs_off; | ||
440 | atomic_t softirqs_on_events; | ||
441 | atomic_t softirqs_off_events; | ||
442 | atomic_t redundant_softirqs_on; | ||
443 | atomic_t redundant_softirqs_off; | ||
444 | atomic_t nr_unused_locks; | ||
445 | atomic_t nr_cyclic_checks; | ||
446 | atomic_t nr_find_usage_forwards_checks; | ||
447 | atomic_t nr_find_usage_backwards_checks; | ||
448 | #endif | 435 | #endif |
449 | 436 | ||
450 | /* | 437 | /* |
@@ -748,7 +735,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
748 | return NULL; | 735 | return NULL; |
749 | } | 736 | } |
750 | class = lock_classes + nr_lock_classes++; | 737 | class = lock_classes + nr_lock_classes++; |
751 | debug_atomic_inc(&nr_unused_locks); | 738 | debug_atomic_inc(nr_unused_locks); |
752 | class->key = key; | 739 | class->key = key; |
753 | class->name = lock->name; | 740 | class->name = lock->name; |
754 | class->subclass = subclass; | 741 | class->subclass = subclass; |
@@ -818,7 +805,8 @@ static struct lock_list *alloc_list_entry(void) | |||
818 | * Add a new dependency to the head of the list: | 805 | * Add a new dependency to the head of the list: |
819 | */ | 806 | */ |
820 | static int add_lock_to_list(struct lock_class *class, struct lock_class *this, | 807 | static int add_lock_to_list(struct lock_class *class, struct lock_class *this, |
821 | struct list_head *head, unsigned long ip, int distance) | 808 | struct list_head *head, unsigned long ip, |
809 | int distance, struct stack_trace *trace) | ||
822 | { | 810 | { |
823 | struct lock_list *entry; | 811 | struct lock_list *entry; |
824 | /* | 812 | /* |
@@ -829,11 +817,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, | |||
829 | if (!entry) | 817 | if (!entry) |
830 | return 0; | 818 | return 0; |
831 | 819 | ||
832 | if (!save_trace(&entry->trace)) | ||
833 | return 0; | ||
834 | |||
835 | entry->class = this; | 820 | entry->class = this; |
836 | entry->distance = distance; | 821 | entry->distance = distance; |
822 | entry->trace = *trace; | ||
837 | /* | 823 | /* |
838 | * Since we never remove from the dependency list, the list can | 824 | * Since we never remove from the dependency list, the list can |
839 | * be walked lockless by other CPUs, it's only allocation | 825 | * be walked lockless by other CPUs, it's only allocation |
@@ -1205,7 +1191,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target, | |||
1205 | { | 1191 | { |
1206 | int result; | 1192 | int result; |
1207 | 1193 | ||
1208 | debug_atomic_inc(&nr_cyclic_checks); | 1194 | debug_atomic_inc(nr_cyclic_checks); |
1209 | 1195 | ||
1210 | result = __bfs_forwards(root, target, class_equal, target_entry); | 1196 | result = __bfs_forwards(root, target, class_equal, target_entry); |
1211 | 1197 | ||
@@ -1242,7 +1228,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | |||
1242 | { | 1228 | { |
1243 | int result; | 1229 | int result; |
1244 | 1230 | ||
1245 | debug_atomic_inc(&nr_find_usage_forwards_checks); | 1231 | debug_atomic_inc(nr_find_usage_forwards_checks); |
1246 | 1232 | ||
1247 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); | 1233 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); |
1248 | 1234 | ||
@@ -1265,7 +1251,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, | |||
1265 | { | 1251 | { |
1266 | int result; | 1252 | int result; |
1267 | 1253 | ||
1268 | debug_atomic_inc(&nr_find_usage_backwards_checks); | 1254 | debug_atomic_inc(nr_find_usage_backwards_checks); |
1269 | 1255 | ||
1270 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); | 1256 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); |
1271 | 1257 | ||
@@ -1635,12 +1621,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, | |||
1635 | */ | 1621 | */ |
1636 | static int | 1622 | static int |
1637 | check_prev_add(struct task_struct *curr, struct held_lock *prev, | 1623 | check_prev_add(struct task_struct *curr, struct held_lock *prev, |
1638 | struct held_lock *next, int distance) | 1624 | struct held_lock *next, int distance, int trylock_loop) |
1639 | { | 1625 | { |
1640 | struct lock_list *entry; | 1626 | struct lock_list *entry; |
1641 | int ret; | 1627 | int ret; |
1642 | struct lock_list this; | 1628 | struct lock_list this; |
1643 | struct lock_list *uninitialized_var(target_entry); | 1629 | struct lock_list *uninitialized_var(target_entry); |
1630 | /* | ||
1631 | * Static variable, serialized by the graph_lock(). | ||
1632 | * | ||
1633 | * We use this static variable to save the stack trace in case | ||
1634 | * we call into this function multiple times due to encountering | ||
1635 | * trylocks in the held lock stack. | ||
1636 | */ | ||
1637 | static struct stack_trace trace; | ||
1644 | 1638 | ||
1645 | /* | 1639 | /* |
1646 | * Prove that the new <prev> -> <next> dependency would not | 1640 | * Prove that the new <prev> -> <next> dependency would not |
@@ -1688,20 +1682,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
1688 | } | 1682 | } |
1689 | } | 1683 | } |
1690 | 1684 | ||
1685 | if (!trylock_loop && !save_trace(&trace)) | ||
1686 | return 0; | ||
1687 | |||
1691 | /* | 1688 | /* |
1692 | * Ok, all validations passed, add the new lock | 1689 | * Ok, all validations passed, add the new lock |
1693 | * to the previous lock's dependency list: | 1690 | * to the previous lock's dependency list: |
1694 | */ | 1691 | */ |
1695 | ret = add_lock_to_list(hlock_class(prev), hlock_class(next), | 1692 | ret = add_lock_to_list(hlock_class(prev), hlock_class(next), |
1696 | &hlock_class(prev)->locks_after, | 1693 | &hlock_class(prev)->locks_after, |
1697 | next->acquire_ip, distance); | 1694 | next->acquire_ip, distance, &trace); |
1698 | 1695 | ||
1699 | if (!ret) | 1696 | if (!ret) |
1700 | return 0; | 1697 | return 0; |
1701 | 1698 | ||
1702 | ret = add_lock_to_list(hlock_class(next), hlock_class(prev), | 1699 | ret = add_lock_to_list(hlock_class(next), hlock_class(prev), |
1703 | &hlock_class(next)->locks_before, | 1700 | &hlock_class(next)->locks_before, |
1704 | next->acquire_ip, distance); | 1701 | next->acquire_ip, distance, &trace); |
1705 | if (!ret) | 1702 | if (!ret) |
1706 | return 0; | 1703 | return 0; |
1707 | 1704 | ||
@@ -1731,6 +1728,7 @@ static int | |||
1731 | check_prevs_add(struct task_struct *curr, struct held_lock *next) | 1728 | check_prevs_add(struct task_struct *curr, struct held_lock *next) |
1732 | { | 1729 | { |
1733 | int depth = curr->lockdep_depth; | 1730 | int depth = curr->lockdep_depth; |
1731 | int trylock_loop = 0; | ||
1734 | struct held_lock *hlock; | 1732 | struct held_lock *hlock; |
1735 | 1733 | ||
1736 | /* | 1734 | /* |
@@ -1756,7 +1754,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
1756 | * added: | 1754 | * added: |
1757 | */ | 1755 | */ |
1758 | if (hlock->read != 2) { | 1756 | if (hlock->read != 2) { |
1759 | if (!check_prev_add(curr, hlock, next, distance)) | 1757 | if (!check_prev_add(curr, hlock, next, |
1758 | distance, trylock_loop)) | ||
1760 | return 0; | 1759 | return 0; |
1761 | /* | 1760 | /* |
1762 | * Stop after the first non-trylock entry, | 1761 | * Stop after the first non-trylock entry, |
@@ -1779,6 +1778,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
1779 | if (curr->held_locks[depth].irq_context != | 1778 | if (curr->held_locks[depth].irq_context != |
1780 | curr->held_locks[depth-1].irq_context) | 1779 | curr->held_locks[depth-1].irq_context) |
1781 | break; | 1780 | break; |
1781 | trylock_loop = 1; | ||
1782 | } | 1782 | } |
1783 | return 1; | 1783 | return 1; |
1784 | out_bug: | 1784 | out_bug: |
@@ -1825,7 +1825,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
1825 | list_for_each_entry(chain, hash_head, entry) { | 1825 | list_for_each_entry(chain, hash_head, entry) { |
1826 | if (chain->chain_key == chain_key) { | 1826 | if (chain->chain_key == chain_key) { |
1827 | cache_hit: | 1827 | cache_hit: |
1828 | debug_atomic_inc(&chain_lookup_hits); | 1828 | debug_atomic_inc(chain_lookup_hits); |
1829 | if (very_verbose(class)) | 1829 | if (very_verbose(class)) |
1830 | printk("\nhash chain already cached, key: " | 1830 | printk("\nhash chain already cached, key: " |
1831 | "%016Lx tail class: [%p] %s\n", | 1831 | "%016Lx tail class: [%p] %s\n", |
@@ -1890,7 +1890,7 @@ cache_hit: | |||
1890 | chain_hlocks[chain->base + j] = class - lock_classes; | 1890 | chain_hlocks[chain->base + j] = class - lock_classes; |
1891 | } | 1891 | } |
1892 | list_add_tail_rcu(&chain->entry, hash_head); | 1892 | list_add_tail_rcu(&chain->entry, hash_head); |
1893 | debug_atomic_inc(&chain_lookup_misses); | 1893 | debug_atomic_inc(chain_lookup_misses); |
1894 | inc_chains(); | 1894 | inc_chains(); |
1895 | 1895 | ||
1896 | return 1; | 1896 | return 1; |
@@ -2311,7 +2311,12 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2311 | return; | 2311 | return; |
2312 | 2312 | ||
2313 | if (unlikely(curr->hardirqs_enabled)) { | 2313 | if (unlikely(curr->hardirqs_enabled)) { |
2314 | debug_atomic_inc(&redundant_hardirqs_on); | 2314 | /* |
2315 | * Neither irq nor preemption are disabled here | ||
2316 | * so this is racy by nature but loosing one hit | ||
2317 | * in a stat is not a big deal. | ||
2318 | */ | ||
2319 | __debug_atomic_inc(redundant_hardirqs_on); | ||
2315 | return; | 2320 | return; |
2316 | } | 2321 | } |
2317 | /* we'll do an OFF -> ON transition: */ | 2322 | /* we'll do an OFF -> ON transition: */ |
@@ -2338,7 +2343,7 @@ void trace_hardirqs_on_caller(unsigned long ip) | |||
2338 | 2343 | ||
2339 | curr->hardirq_enable_ip = ip; | 2344 | curr->hardirq_enable_ip = ip; |
2340 | curr->hardirq_enable_event = ++curr->irq_events; | 2345 | curr->hardirq_enable_event = ++curr->irq_events; |
2341 | debug_atomic_inc(&hardirqs_on_events); | 2346 | debug_atomic_inc(hardirqs_on_events); |
2342 | } | 2347 | } |
2343 | EXPORT_SYMBOL(trace_hardirqs_on_caller); | 2348 | EXPORT_SYMBOL(trace_hardirqs_on_caller); |
2344 | 2349 | ||
@@ -2370,9 +2375,9 @@ void trace_hardirqs_off_caller(unsigned long ip) | |||
2370 | curr->hardirqs_enabled = 0; | 2375 | curr->hardirqs_enabled = 0; |
2371 | curr->hardirq_disable_ip = ip; | 2376 | curr->hardirq_disable_ip = ip; |
2372 | curr->hardirq_disable_event = ++curr->irq_events; | 2377 | curr->hardirq_disable_event = ++curr->irq_events; |
2373 | debug_atomic_inc(&hardirqs_off_events); | 2378 | debug_atomic_inc(hardirqs_off_events); |
2374 | } else | 2379 | } else |
2375 | debug_atomic_inc(&redundant_hardirqs_off); | 2380 | debug_atomic_inc(redundant_hardirqs_off); |
2376 | } | 2381 | } |
2377 | EXPORT_SYMBOL(trace_hardirqs_off_caller); | 2382 | EXPORT_SYMBOL(trace_hardirqs_off_caller); |
2378 | 2383 | ||
@@ -2396,7 +2401,7 @@ void trace_softirqs_on(unsigned long ip) | |||
2396 | return; | 2401 | return; |
2397 | 2402 | ||
2398 | if (curr->softirqs_enabled) { | 2403 | if (curr->softirqs_enabled) { |
2399 | debug_atomic_inc(&redundant_softirqs_on); | 2404 | debug_atomic_inc(redundant_softirqs_on); |
2400 | return; | 2405 | return; |
2401 | } | 2406 | } |
2402 | 2407 | ||
@@ -2406,7 +2411,7 @@ void trace_softirqs_on(unsigned long ip) | |||
2406 | curr->softirqs_enabled = 1; | 2411 | curr->softirqs_enabled = 1; |
2407 | curr->softirq_enable_ip = ip; | 2412 | curr->softirq_enable_ip = ip; |
2408 | curr->softirq_enable_event = ++curr->irq_events; | 2413 | curr->softirq_enable_event = ++curr->irq_events; |
2409 | debug_atomic_inc(&softirqs_on_events); | 2414 | debug_atomic_inc(softirqs_on_events); |
2410 | /* | 2415 | /* |
2411 | * We are going to turn softirqs on, so set the | 2416 | * We are going to turn softirqs on, so set the |
2412 | * usage bit for all held locks, if hardirqs are | 2417 | * usage bit for all held locks, if hardirqs are |
@@ -2436,10 +2441,10 @@ void trace_softirqs_off(unsigned long ip) | |||
2436 | curr->softirqs_enabled = 0; | 2441 | curr->softirqs_enabled = 0; |
2437 | curr->softirq_disable_ip = ip; | 2442 | curr->softirq_disable_ip = ip; |
2438 | curr->softirq_disable_event = ++curr->irq_events; | 2443 | curr->softirq_disable_event = ++curr->irq_events; |
2439 | debug_atomic_inc(&softirqs_off_events); | 2444 | debug_atomic_inc(softirqs_off_events); |
2440 | DEBUG_LOCKS_WARN_ON(!softirq_count()); | 2445 | DEBUG_LOCKS_WARN_ON(!softirq_count()); |
2441 | } else | 2446 | } else |
2442 | debug_atomic_inc(&redundant_softirqs_off); | 2447 | debug_atomic_inc(redundant_softirqs_off); |
2443 | } | 2448 | } |
2444 | 2449 | ||
2445 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) | 2450 | static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags) |
@@ -2644,7 +2649,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, | |||
2644 | return 0; | 2649 | return 0; |
2645 | break; | 2650 | break; |
2646 | case LOCK_USED: | 2651 | case LOCK_USED: |
2647 | debug_atomic_dec(&nr_unused_locks); | 2652 | debug_atomic_dec(nr_unused_locks); |
2648 | break; | 2653 | break; |
2649 | default: | 2654 | default: |
2650 | if (!debug_locks_off_graph_unlock()) | 2655 | if (!debug_locks_off_graph_unlock()) |
@@ -2750,7 +2755,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
2750 | if (!class) | 2755 | if (!class) |
2751 | return 0; | 2756 | return 0; |
2752 | } | 2757 | } |
2753 | debug_atomic_inc((atomic_t *)&class->ops); | 2758 | atomic_inc((atomic_t *)&class->ops); |
2754 | if (very_verbose(class)) { | 2759 | if (very_verbose(class)) { |
2755 | printk("\nacquire class [%p] %s", class->key, class->name); | 2760 | printk("\nacquire class [%p] %s", class->key, class->name); |
2756 | if (class->name_version > 1) | 2761 | if (class->name_version > 1) |
@@ -3801,8 +3806,11 @@ void lockdep_rcu_dereference(const char *file, const int line) | |||
3801 | { | 3806 | { |
3802 | struct task_struct *curr = current; | 3807 | struct task_struct *curr = current; |
3803 | 3808 | ||
3809 | #ifndef CONFIG_PROVE_RCU_REPEATEDLY | ||
3804 | if (!debug_locks_off()) | 3810 | if (!debug_locks_off()) |
3805 | return; | 3811 | return; |
3812 | #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | ||
3813 | /* Note: the following can be executed concurrently, so be careful. */ | ||
3806 | printk("\n===================================================\n"); | 3814 | printk("\n===================================================\n"); |
3807 | printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); | 3815 | printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); |
3808 | printk( "---------------------------------------------------\n"); | 3816 | printk( "---------------------------------------------------\n"); |
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h index a2ee95ad1313..4f560cfedc8f 100644 --- a/kernel/lockdep_internals.h +++ b/kernel/lockdep_internals.h | |||
@@ -110,30 +110,60 @@ lockdep_count_backward_deps(struct lock_class *class) | |||
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #ifdef CONFIG_DEBUG_LOCKDEP | 112 | #ifdef CONFIG_DEBUG_LOCKDEP |
113 | |||
114 | #include <asm/local.h> | ||
113 | /* | 115 | /* |
114 | * Various lockdep statistics: | 116 | * Various lockdep statistics. |
117 | * We want them per cpu as they are often accessed in fast path | ||
118 | * and we want to avoid too much cache bouncing. | ||
115 | */ | 119 | */ |
116 | extern atomic_t chain_lookup_hits; | 120 | struct lockdep_stats { |
117 | extern atomic_t chain_lookup_misses; | 121 | int chain_lookup_hits; |
118 | extern atomic_t hardirqs_on_events; | 122 | int chain_lookup_misses; |
119 | extern atomic_t hardirqs_off_events; | 123 | int hardirqs_on_events; |
120 | extern atomic_t redundant_hardirqs_on; | 124 | int hardirqs_off_events; |
121 | extern atomic_t redundant_hardirqs_off; | 125 | int redundant_hardirqs_on; |
122 | extern atomic_t softirqs_on_events; | 126 | int redundant_hardirqs_off; |
123 | extern atomic_t softirqs_off_events; | 127 | int softirqs_on_events; |
124 | extern atomic_t redundant_softirqs_on; | 128 | int softirqs_off_events; |
125 | extern atomic_t redundant_softirqs_off; | 129 | int redundant_softirqs_on; |
126 | extern atomic_t nr_unused_locks; | 130 | int redundant_softirqs_off; |
127 | extern atomic_t nr_cyclic_checks; | 131 | int nr_unused_locks; |
128 | extern atomic_t nr_cyclic_check_recursions; | 132 | int nr_cyclic_checks; |
129 | extern atomic_t nr_find_usage_forwards_checks; | 133 | int nr_cyclic_check_recursions; |
130 | extern atomic_t nr_find_usage_forwards_recursions; | 134 | int nr_find_usage_forwards_checks; |
131 | extern atomic_t nr_find_usage_backwards_checks; | 135 | int nr_find_usage_forwards_recursions; |
132 | extern atomic_t nr_find_usage_backwards_recursions; | 136 | int nr_find_usage_backwards_checks; |
133 | # define debug_atomic_inc(ptr) atomic_inc(ptr) | 137 | int nr_find_usage_backwards_recursions; |
134 | # define debug_atomic_dec(ptr) atomic_dec(ptr) | 138 | }; |
135 | # define debug_atomic_read(ptr) atomic_read(ptr) | 139 | |
140 | DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); | ||
141 | |||
142 | #define __debug_atomic_inc(ptr) \ | ||
143 | this_cpu_inc(lockdep_stats.ptr); | ||
144 | |||
145 | #define debug_atomic_inc(ptr) { \ | ||
146 | WARN_ON_ONCE(!irqs_disabled()); \ | ||
147 | __this_cpu_inc(lockdep_stats.ptr); \ | ||
148 | } | ||
149 | |||
150 | #define debug_atomic_dec(ptr) { \ | ||
151 | WARN_ON_ONCE(!irqs_disabled()); \ | ||
152 | __this_cpu_dec(lockdep_stats.ptr); \ | ||
153 | } | ||
154 | |||
155 | #define debug_atomic_read(ptr) ({ \ | ||
156 | struct lockdep_stats *__cpu_lockdep_stats; \ | ||
157 | unsigned long long __total = 0; \ | ||
158 | int __cpu; \ | ||
159 | for_each_possible_cpu(__cpu) { \ | ||
160 | __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \ | ||
161 | __total += __cpu_lockdep_stats->ptr; \ | ||
162 | } \ | ||
163 | __total; \ | ||
164 | }) | ||
136 | #else | 165 | #else |
166 | # define __debug_atomic_inc(ptr) do { } while (0) | ||
137 | # define debug_atomic_inc(ptr) do { } while (0) | 167 | # define debug_atomic_inc(ptr) do { } while (0) |
138 | # define debug_atomic_dec(ptr) do { } while (0) | 168 | # define debug_atomic_dec(ptr) do { } while (0) |
139 | # define debug_atomic_read(ptr) 0 | 169 | # define debug_atomic_read(ptr) 0 |
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c index d4aba4f3584c..59b76c8ce9d7 100644 --- a/kernel/lockdep_proc.c +++ b/kernel/lockdep_proc.c | |||
@@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = { | |||
184 | static void lockdep_stats_debug_show(struct seq_file *m) | 184 | static void lockdep_stats_debug_show(struct seq_file *m) |
185 | { | 185 | { |
186 | #ifdef CONFIG_DEBUG_LOCKDEP | 186 | #ifdef CONFIG_DEBUG_LOCKDEP |
187 | unsigned int hi1 = debug_atomic_read(&hardirqs_on_events), | 187 | unsigned long long hi1 = debug_atomic_read(hardirqs_on_events), |
188 | hi2 = debug_atomic_read(&hardirqs_off_events), | 188 | hi2 = debug_atomic_read(hardirqs_off_events), |
189 | hr1 = debug_atomic_read(&redundant_hardirqs_on), | 189 | hr1 = debug_atomic_read(redundant_hardirqs_on), |
190 | hr2 = debug_atomic_read(&redundant_hardirqs_off), | 190 | hr2 = debug_atomic_read(redundant_hardirqs_off), |
191 | si1 = debug_atomic_read(&softirqs_on_events), | 191 | si1 = debug_atomic_read(softirqs_on_events), |
192 | si2 = debug_atomic_read(&softirqs_off_events), | 192 | si2 = debug_atomic_read(softirqs_off_events), |
193 | sr1 = debug_atomic_read(&redundant_softirqs_on), | 193 | sr1 = debug_atomic_read(redundant_softirqs_on), |
194 | sr2 = debug_atomic_read(&redundant_softirqs_off); | 194 | sr2 = debug_atomic_read(redundant_softirqs_off); |
195 | 195 | ||
196 | seq_printf(m, " chain lookup misses: %11u\n", | 196 | seq_printf(m, " chain lookup misses: %11llu\n", |
197 | debug_atomic_read(&chain_lookup_misses)); | 197 | debug_atomic_read(chain_lookup_misses)); |
198 | seq_printf(m, " chain lookup hits: %11u\n", | 198 | seq_printf(m, " chain lookup hits: %11llu\n", |
199 | debug_atomic_read(&chain_lookup_hits)); | 199 | debug_atomic_read(chain_lookup_hits)); |
200 | seq_printf(m, " cyclic checks: %11u\n", | 200 | seq_printf(m, " cyclic checks: %11llu\n", |
201 | debug_atomic_read(&nr_cyclic_checks)); | 201 | debug_atomic_read(nr_cyclic_checks)); |
202 | seq_printf(m, " find-mask forwards checks: %11u\n", | 202 | seq_printf(m, " find-mask forwards checks: %11llu\n", |
203 | debug_atomic_read(&nr_find_usage_forwards_checks)); | 203 | debug_atomic_read(nr_find_usage_forwards_checks)); |
204 | seq_printf(m, " find-mask backwards checks: %11u\n", | 204 | seq_printf(m, " find-mask backwards checks: %11llu\n", |
205 | debug_atomic_read(&nr_find_usage_backwards_checks)); | 205 | debug_atomic_read(nr_find_usage_backwards_checks)); |
206 | 206 | ||
207 | seq_printf(m, " hardirq on events: %11u\n", hi1); | 207 | seq_printf(m, " hardirq on events: %11llu\n", hi1); |
208 | seq_printf(m, " hardirq off events: %11u\n", hi2); | 208 | seq_printf(m, " hardirq off events: %11llu\n", hi2); |
209 | seq_printf(m, " redundant hardirq ons: %11u\n", hr1); | 209 | seq_printf(m, " redundant hardirq ons: %11llu\n", hr1); |
210 | seq_printf(m, " redundant hardirq offs: %11u\n", hr2); | 210 | seq_printf(m, " redundant hardirq offs: %11llu\n", hr2); |
211 | seq_printf(m, " softirq on events: %11u\n", si1); | 211 | seq_printf(m, " softirq on events: %11llu\n", si1); |
212 | seq_printf(m, " softirq off events: %11u\n", si2); | 212 | seq_printf(m, " softirq off events: %11llu\n", si2); |
213 | seq_printf(m, " redundant softirq ons: %11u\n", sr1); | 213 | seq_printf(m, " redundant softirq ons: %11llu\n", sr1); |
214 | seq_printf(m, " redundant softirq offs: %11u\n", sr2); | 214 | seq_printf(m, " redundant softirq offs: %11llu\n", sr2); |
215 | #endif | 215 | #endif |
216 | } | 216 | } |
217 | 217 | ||
@@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v) | |||
263 | #endif | 263 | #endif |
264 | } | 264 | } |
265 | #ifdef CONFIG_DEBUG_LOCKDEP | 265 | #ifdef CONFIG_DEBUG_LOCKDEP |
266 | DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused); | 266 | DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused); |
267 | #endif | 267 | #endif |
268 | seq_printf(m, " lock-classes: %11lu [max: %lu]\n", | 268 | seq_printf(m, " lock-classes: %11lu [max: %lu]\n", |
269 | nr_lock_classes, MAX_LOCKDEP_KEYS); | 269 | nr_lock_classes, MAX_LOCKDEP_KEYS); |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index a4fa381db3c2..e099650cd249 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -2297,11 +2297,6 @@ unlock: | |||
2297 | rcu_read_unlock(); | 2297 | rcu_read_unlock(); |
2298 | } | 2298 | } |
2299 | 2299 | ||
2300 | static unsigned long perf_data_size(struct perf_mmap_data *data) | ||
2301 | { | ||
2302 | return data->nr_pages << (PAGE_SHIFT + data->data_order); | ||
2303 | } | ||
2304 | |||
2305 | #ifndef CONFIG_PERF_USE_VMALLOC | 2300 | #ifndef CONFIG_PERF_USE_VMALLOC |
2306 | 2301 | ||
2307 | /* | 2302 | /* |
@@ -2320,6 +2315,19 @@ perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) | |||
2320 | return virt_to_page(data->data_pages[pgoff - 1]); | 2315 | return virt_to_page(data->data_pages[pgoff - 1]); |
2321 | } | 2316 | } |
2322 | 2317 | ||
2318 | static void *perf_mmap_alloc_page(int cpu) | ||
2319 | { | ||
2320 | struct page *page; | ||
2321 | int node; | ||
2322 | |||
2323 | node = (cpu == -1) ? cpu : cpu_to_node(cpu); | ||
2324 | page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); | ||
2325 | if (!page) | ||
2326 | return NULL; | ||
2327 | |||
2328 | return page_address(page); | ||
2329 | } | ||
2330 | |||
2323 | static struct perf_mmap_data * | 2331 | static struct perf_mmap_data * |
2324 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | 2332 | perf_mmap_data_alloc(struct perf_event *event, int nr_pages) |
2325 | { | 2333 | { |
@@ -2336,17 +2344,16 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
2336 | if (!data) | 2344 | if (!data) |
2337 | goto fail; | 2345 | goto fail; |
2338 | 2346 | ||
2339 | data->user_page = (void *)get_zeroed_page(GFP_KERNEL); | 2347 | data->user_page = perf_mmap_alloc_page(event->cpu); |
2340 | if (!data->user_page) | 2348 | if (!data->user_page) |
2341 | goto fail_user_page; | 2349 | goto fail_user_page; |
2342 | 2350 | ||
2343 | for (i = 0; i < nr_pages; i++) { | 2351 | for (i = 0; i < nr_pages; i++) { |
2344 | data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | 2352 | data->data_pages[i] = perf_mmap_alloc_page(event->cpu); |
2345 | if (!data->data_pages[i]) | 2353 | if (!data->data_pages[i]) |
2346 | goto fail_data_pages; | 2354 | goto fail_data_pages; |
2347 | } | 2355 | } |
2348 | 2356 | ||
2349 | data->data_order = 0; | ||
2350 | data->nr_pages = nr_pages; | 2357 | data->nr_pages = nr_pages; |
2351 | 2358 | ||
2352 | return data; | 2359 | return data; |
@@ -2382,6 +2389,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data) | |||
2382 | kfree(data); | 2389 | kfree(data); |
2383 | } | 2390 | } |
2384 | 2391 | ||
2392 | static inline int page_order(struct perf_mmap_data *data) | ||
2393 | { | ||
2394 | return 0; | ||
2395 | } | ||
2396 | |||
2385 | #else | 2397 | #else |
2386 | 2398 | ||
2387 | /* | 2399 | /* |
@@ -2390,10 +2402,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data) | |||
2390 | * Required for architectures that have d-cache aliasing issues. | 2402 | * Required for architectures that have d-cache aliasing issues. |
2391 | */ | 2403 | */ |
2392 | 2404 | ||
2405 | static inline int page_order(struct perf_mmap_data *data) | ||
2406 | { | ||
2407 | return data->page_order; | ||
2408 | } | ||
2409 | |||
2393 | static struct page * | 2410 | static struct page * |
2394 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) | 2411 | perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff) |
2395 | { | 2412 | { |
2396 | if (pgoff > (1UL << data->data_order)) | 2413 | if (pgoff > (1UL << page_order(data))) |
2397 | return NULL; | 2414 | return NULL; |
2398 | 2415 | ||
2399 | return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); | 2416 | return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE); |
@@ -2413,7 +2430,7 @@ static void perf_mmap_data_free_work(struct work_struct *work) | |||
2413 | int i, nr; | 2430 | int i, nr; |
2414 | 2431 | ||
2415 | data = container_of(work, struct perf_mmap_data, work); | 2432 | data = container_of(work, struct perf_mmap_data, work); |
2416 | nr = 1 << data->data_order; | 2433 | nr = 1 << page_order(data); |
2417 | 2434 | ||
2418 | base = data->user_page; | 2435 | base = data->user_page; |
2419 | for (i = 0; i < nr + 1; i++) | 2436 | for (i = 0; i < nr + 1; i++) |
@@ -2452,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages) | |||
2452 | 2469 | ||
2453 | data->user_page = all_buf; | 2470 | data->user_page = all_buf; |
2454 | data->data_pages[0] = all_buf + PAGE_SIZE; | 2471 | data->data_pages[0] = all_buf + PAGE_SIZE; |
2455 | data->data_order = ilog2(nr_pages); | 2472 | data->page_order = ilog2(nr_pages); |
2456 | data->nr_pages = 1; | 2473 | data->nr_pages = 1; |
2457 | 2474 | ||
2458 | return data; | 2475 | return data; |
@@ -2466,6 +2483,11 @@ fail: | |||
2466 | 2483 | ||
2467 | #endif | 2484 | #endif |
2468 | 2485 | ||
2486 | static unsigned long perf_data_size(struct perf_mmap_data *data) | ||
2487 | { | ||
2488 | return data->nr_pages << (PAGE_SHIFT + page_order(data)); | ||
2489 | } | ||
2490 | |||
2469 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | 2491 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) |
2470 | { | 2492 | { |
2471 | struct perf_event *event = vma->vm_file->private_data; | 2493 | struct perf_event *event = vma->vm_file->private_data; |
@@ -2506,8 +2528,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) | |||
2506 | { | 2528 | { |
2507 | long max_size = perf_data_size(data); | 2529 | long max_size = perf_data_size(data); |
2508 | 2530 | ||
2509 | atomic_set(&data->lock, -1); | ||
2510 | |||
2511 | if (event->attr.watermark) { | 2531 | if (event->attr.watermark) { |
2512 | data->watermark = min_t(long, max_size, | 2532 | data->watermark = min_t(long, max_size, |
2513 | event->attr.wakeup_watermark); | 2533 | event->attr.wakeup_watermark); |
@@ -2580,6 +2600,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
2580 | long user_extra, extra; | 2600 | long user_extra, extra; |
2581 | int ret = 0; | 2601 | int ret = 0; |
2582 | 2602 | ||
2603 | /* | ||
2604 | * Don't allow mmap() of inherited per-task counters. This would | ||
2605 | * create a performance issue due to all children writing to the | ||
2606 | * same buffer. | ||
2607 | */ | ||
2608 | if (event->cpu == -1 && event->attr.inherit) | ||
2609 | return -EINVAL; | ||
2610 | |||
2583 | if (!(vma->vm_flags & VM_SHARED)) | 2611 | if (!(vma->vm_flags & VM_SHARED)) |
2584 | return -EINVAL; | 2612 | return -EINVAL; |
2585 | 2613 | ||
@@ -2885,120 +2913,80 @@ static void perf_output_wakeup(struct perf_output_handle *handle) | |||
2885 | } | 2913 | } |
2886 | 2914 | ||
2887 | /* | 2915 | /* |
2888 | * Curious locking construct. | ||
2889 | * | ||
2890 | * We need to ensure a later event_id doesn't publish a head when a former | 2916 | * We need to ensure a later event_id doesn't publish a head when a former |
2891 | * event_id isn't done writing. However since we need to deal with NMIs we | 2917 | * event isn't done writing. However since we need to deal with NMIs we |
2892 | * cannot fully serialize things. | 2918 | * cannot fully serialize things. |
2893 | * | 2919 | * |
2894 | * What we do is serialize between CPUs so we only have to deal with NMI | ||
2895 | * nesting on a single CPU. | ||
2896 | * | ||
2897 | * We only publish the head (and generate a wakeup) when the outer-most | 2920 | * We only publish the head (and generate a wakeup) when the outer-most |
2898 | * event_id completes. | 2921 | * event completes. |
2899 | */ | 2922 | */ |
2900 | static void perf_output_lock(struct perf_output_handle *handle) | 2923 | static void perf_output_get_handle(struct perf_output_handle *handle) |
2901 | { | 2924 | { |
2902 | struct perf_mmap_data *data = handle->data; | 2925 | struct perf_mmap_data *data = handle->data; |
2903 | int cur, cpu = get_cpu(); | ||
2904 | |||
2905 | handle->locked = 0; | ||
2906 | |||
2907 | for (;;) { | ||
2908 | cur = atomic_cmpxchg(&data->lock, -1, cpu); | ||
2909 | if (cur == -1) { | ||
2910 | handle->locked = 1; | ||
2911 | break; | ||
2912 | } | ||
2913 | if (cur == cpu) | ||
2914 | break; | ||
2915 | 2926 | ||
2916 | cpu_relax(); | 2927 | preempt_disable(); |
2917 | } | 2928 | local_inc(&data->nest); |
2929 | handle->wakeup = local_read(&data->wakeup); | ||
2918 | } | 2930 | } |
2919 | 2931 | ||
2920 | static void perf_output_unlock(struct perf_output_handle *handle) | 2932 | static void perf_output_put_handle(struct perf_output_handle *handle) |
2921 | { | 2933 | { |
2922 | struct perf_mmap_data *data = handle->data; | 2934 | struct perf_mmap_data *data = handle->data; |
2923 | unsigned long head; | 2935 | unsigned long head; |
2924 | int cpu; | ||
2925 | |||
2926 | data->done_head = data->head; | ||
2927 | |||
2928 | if (!handle->locked) | ||
2929 | goto out; | ||
2930 | 2936 | ||
2931 | again: | 2937 | again: |
2932 | /* | 2938 | head = local_read(&data->head); |
2933 | * The xchg implies a full barrier that ensures all writes are done | ||
2934 | * before we publish the new head, matched by a rmb() in userspace when | ||
2935 | * reading this position. | ||
2936 | */ | ||
2937 | while ((head = atomic_long_xchg(&data->done_head, 0))) | ||
2938 | data->user_page->data_head = head; | ||
2939 | 2939 | ||
2940 | /* | 2940 | /* |
2941 | * NMI can happen here, which means we can miss a done_head update. | 2941 | * IRQ/NMI can happen here, which means we can miss a head update. |
2942 | */ | 2942 | */ |
2943 | 2943 | ||
2944 | cpu = atomic_xchg(&data->lock, -1); | 2944 | if (!local_dec_and_test(&data->nest)) |
2945 | WARN_ON_ONCE(cpu != smp_processor_id()); | 2945 | goto out; |
2946 | 2946 | ||
2947 | /* | 2947 | /* |
2948 | * Therefore we have to validate we did not indeed do so. | 2948 | * Publish the known good head. Rely on the full barrier implied |
2949 | * by atomic_dec_and_test() order the data->head read and this | ||
2950 | * write. | ||
2949 | */ | 2951 | */ |
2950 | if (unlikely(atomic_long_read(&data->done_head))) { | 2952 | data->user_page->data_head = head; |
2951 | /* | ||
2952 | * Since we had it locked, we can lock it again. | ||
2953 | */ | ||
2954 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2955 | cpu_relax(); | ||
2956 | 2953 | ||
2954 | /* | ||
2955 | * Now check if we missed an update, rely on the (compiler) | ||
2956 | * barrier in atomic_dec_and_test() to re-read data->head. | ||
2957 | */ | ||
2958 | if (unlikely(head != local_read(&data->head))) { | ||
2959 | local_inc(&data->nest); | ||
2957 | goto again; | 2960 | goto again; |
2958 | } | 2961 | } |
2959 | 2962 | ||
2960 | if (atomic_xchg(&data->wakeup, 0)) | 2963 | if (handle->wakeup != local_read(&data->wakeup)) |
2961 | perf_output_wakeup(handle); | 2964 | perf_output_wakeup(handle); |
2962 | out: | 2965 | |
2963 | put_cpu(); | 2966 | out: |
2967 | preempt_enable(); | ||
2964 | } | 2968 | } |
2965 | 2969 | ||
2966 | void perf_output_copy(struct perf_output_handle *handle, | 2970 | __always_inline void perf_output_copy(struct perf_output_handle *handle, |
2967 | const void *buf, unsigned int len) | 2971 | const void *buf, unsigned int len) |
2968 | { | 2972 | { |
2969 | unsigned int pages_mask; | ||
2970 | unsigned long offset; | ||
2971 | unsigned int size; | ||
2972 | void **pages; | ||
2973 | |||
2974 | offset = handle->offset; | ||
2975 | pages_mask = handle->data->nr_pages - 1; | ||
2976 | pages = handle->data->data_pages; | ||
2977 | |||
2978 | do { | 2973 | do { |
2979 | unsigned long page_offset; | 2974 | unsigned long size = min_t(unsigned long, handle->size, len); |
2980 | unsigned long page_size; | ||
2981 | int nr; | ||
2982 | 2975 | ||
2983 | nr = (offset >> PAGE_SHIFT) & pages_mask; | 2976 | memcpy(handle->addr, buf, size); |
2984 | page_size = 1UL << (handle->data->data_order + PAGE_SHIFT); | ||
2985 | page_offset = offset & (page_size - 1); | ||
2986 | size = min_t(unsigned int, page_size - page_offset, len); | ||
2987 | 2977 | ||
2988 | memcpy(pages[nr] + page_offset, buf, size); | 2978 | len -= size; |
2979 | handle->addr += size; | ||
2980 | handle->size -= size; | ||
2981 | if (!handle->size) { | ||
2982 | struct perf_mmap_data *data = handle->data; | ||
2989 | 2983 | ||
2990 | len -= size; | 2984 | handle->page++; |
2991 | buf += size; | 2985 | handle->page &= data->nr_pages - 1; |
2992 | offset += size; | 2986 | handle->addr = data->data_pages[handle->page]; |
2987 | handle->size = PAGE_SIZE << page_order(data); | ||
2988 | } | ||
2993 | } while (len); | 2989 | } while (len); |
2994 | |||
2995 | handle->offset = offset; | ||
2996 | |||
2997 | /* | ||
2998 | * Check we didn't copy past our reservation window, taking the | ||
2999 | * possible unsigned int wrap into account. | ||
3000 | */ | ||
3001 | WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); | ||
3002 | } | 2990 | } |
3003 | 2991 | ||
3004 | int perf_output_begin(struct perf_output_handle *handle, | 2992 | int perf_output_begin(struct perf_output_handle *handle, |
@@ -3036,13 +3024,13 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3036 | handle->sample = sample; | 3024 | handle->sample = sample; |
3037 | 3025 | ||
3038 | if (!data->nr_pages) | 3026 | if (!data->nr_pages) |
3039 | goto fail; | 3027 | goto out; |
3040 | 3028 | ||
3041 | have_lost = atomic_read(&data->lost); | 3029 | have_lost = local_read(&data->lost); |
3042 | if (have_lost) | 3030 | if (have_lost) |
3043 | size += sizeof(lost_event); | 3031 | size += sizeof(lost_event); |
3044 | 3032 | ||
3045 | perf_output_lock(handle); | 3033 | perf_output_get_handle(handle); |
3046 | 3034 | ||
3047 | do { | 3035 | do { |
3048 | /* | 3036 | /* |
@@ -3052,24 +3040,28 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3052 | */ | 3040 | */ |
3053 | tail = ACCESS_ONCE(data->user_page->data_tail); | 3041 | tail = ACCESS_ONCE(data->user_page->data_tail); |
3054 | smp_rmb(); | 3042 | smp_rmb(); |
3055 | offset = head = atomic_long_read(&data->head); | 3043 | offset = head = local_read(&data->head); |
3056 | head += size; | 3044 | head += size; |
3057 | if (unlikely(!perf_output_space(data, tail, offset, head))) | 3045 | if (unlikely(!perf_output_space(data, tail, offset, head))) |
3058 | goto fail; | 3046 | goto fail; |
3059 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); | 3047 | } while (local_cmpxchg(&data->head, offset, head) != offset); |
3060 | 3048 | ||
3061 | handle->offset = offset; | 3049 | if (head - local_read(&data->wakeup) > data->watermark) |
3062 | handle->head = head; | 3050 | local_add(data->watermark, &data->wakeup); |
3063 | 3051 | ||
3064 | if (head - tail > data->watermark) | 3052 | handle->page = offset >> (PAGE_SHIFT + page_order(data)); |
3065 | atomic_set(&data->wakeup, 1); | 3053 | handle->page &= data->nr_pages - 1; |
3054 | handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1); | ||
3055 | handle->addr = data->data_pages[handle->page]; | ||
3056 | handle->addr += handle->size; | ||
3057 | handle->size = (PAGE_SIZE << page_order(data)) - handle->size; | ||
3066 | 3058 | ||
3067 | if (have_lost) { | 3059 | if (have_lost) { |
3068 | lost_event.header.type = PERF_RECORD_LOST; | 3060 | lost_event.header.type = PERF_RECORD_LOST; |
3069 | lost_event.header.misc = 0; | 3061 | lost_event.header.misc = 0; |
3070 | lost_event.header.size = sizeof(lost_event); | 3062 | lost_event.header.size = sizeof(lost_event); |
3071 | lost_event.id = event->id; | 3063 | lost_event.id = event->id; |
3072 | lost_event.lost = atomic_xchg(&data->lost, 0); | 3064 | lost_event.lost = local_xchg(&data->lost, 0); |
3073 | 3065 | ||
3074 | perf_output_put(handle, lost_event); | 3066 | perf_output_put(handle, lost_event); |
3075 | } | 3067 | } |
@@ -3077,8 +3069,8 @@ int perf_output_begin(struct perf_output_handle *handle, | |||
3077 | return 0; | 3069 | return 0; |
3078 | 3070 | ||
3079 | fail: | 3071 | fail: |
3080 | atomic_inc(&data->lost); | 3072 | local_inc(&data->lost); |
3081 | perf_output_unlock(handle); | 3073 | perf_output_put_handle(handle); |
3082 | out: | 3074 | out: |
3083 | rcu_read_unlock(); | 3075 | rcu_read_unlock(); |
3084 | 3076 | ||
@@ -3093,14 +3085,14 @@ void perf_output_end(struct perf_output_handle *handle) | |||
3093 | int wakeup_events = event->attr.wakeup_events; | 3085 | int wakeup_events = event->attr.wakeup_events; |
3094 | 3086 | ||
3095 | if (handle->sample && wakeup_events) { | 3087 | if (handle->sample && wakeup_events) { |
3096 | int events = atomic_inc_return(&data->events); | 3088 | int events = local_inc_return(&data->events); |
3097 | if (events >= wakeup_events) { | 3089 | if (events >= wakeup_events) { |
3098 | atomic_sub(wakeup_events, &data->events); | 3090 | local_sub(wakeup_events, &data->events); |
3099 | atomic_set(&data->wakeup, 1); | 3091 | local_inc(&data->wakeup); |
3100 | } | 3092 | } |
3101 | } | 3093 | } |
3102 | 3094 | ||
3103 | perf_output_unlock(handle); | 3095 | perf_output_put_handle(handle); |
3104 | rcu_read_unlock(); | 3096 | rcu_read_unlock(); |
3105 | } | 3097 | } |
3106 | 3098 | ||
@@ -3436,22 +3428,13 @@ static void perf_event_task_output(struct perf_event *event, | |||
3436 | { | 3428 | { |
3437 | struct perf_output_handle handle; | 3429 | struct perf_output_handle handle; |
3438 | struct task_struct *task = task_event->task; | 3430 | struct task_struct *task = task_event->task; |
3439 | unsigned long flags; | ||
3440 | int size, ret; | 3431 | int size, ret; |
3441 | 3432 | ||
3442 | /* | ||
3443 | * If this CPU attempts to acquire an rq lock held by a CPU spinning | ||
3444 | * in perf_output_lock() from interrupt context, it's game over. | ||
3445 | */ | ||
3446 | local_irq_save(flags); | ||
3447 | |||
3448 | size = task_event->event_id.header.size; | 3433 | size = task_event->event_id.header.size; |
3449 | ret = perf_output_begin(&handle, event, size, 0, 0); | 3434 | ret = perf_output_begin(&handle, event, size, 0, 0); |
3450 | 3435 | ||
3451 | if (ret) { | 3436 | if (ret) |
3452 | local_irq_restore(flags); | ||
3453 | return; | 3437 | return; |
3454 | } | ||
3455 | 3438 | ||
3456 | task_event->event_id.pid = perf_event_pid(event, task); | 3439 | task_event->event_id.pid = perf_event_pid(event, task); |
3457 | task_event->event_id.ppid = perf_event_pid(event, current); | 3440 | task_event->event_id.ppid = perf_event_pid(event, current); |
@@ -3462,7 +3445,6 @@ static void perf_event_task_output(struct perf_event *event, | |||
3462 | perf_output_put(&handle, task_event->event_id); | 3445 | perf_output_put(&handle, task_event->event_id); |
3463 | 3446 | ||
3464 | perf_output_end(&handle); | 3447 | perf_output_end(&handle); |
3465 | local_irq_restore(flags); | ||
3466 | } | 3448 | } |
3467 | 3449 | ||
3468 | static int perf_event_task_match(struct perf_event *event) | 3450 | static int perf_event_task_match(struct perf_event *event) |
@@ -4020,9 +4002,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, | |||
4020 | perf_swevent_overflow(event, 0, nmi, data, regs); | 4002 | perf_swevent_overflow(event, 0, nmi, data, regs); |
4021 | } | 4003 | } |
4022 | 4004 | ||
4023 | static int perf_tp_event_match(struct perf_event *event, | ||
4024 | struct perf_sample_data *data); | ||
4025 | |||
4026 | static int perf_exclude_event(struct perf_event *event, | 4005 | static int perf_exclude_event(struct perf_event *event, |
4027 | struct pt_regs *regs) | 4006 | struct pt_regs *regs) |
4028 | { | 4007 | { |
@@ -4052,10 +4031,6 @@ static int perf_swevent_match(struct perf_event *event, | |||
4052 | if (perf_exclude_event(event, regs)) | 4031 | if (perf_exclude_event(event, regs)) |
4053 | return 0; | 4032 | return 0; |
4054 | 4033 | ||
4055 | if (event->attr.type == PERF_TYPE_TRACEPOINT && | ||
4056 | !perf_tp_event_match(event, data)) | ||
4057 | return 0; | ||
4058 | |||
4059 | return 1; | 4034 | return 1; |
4060 | } | 4035 | } |
4061 | 4036 | ||
@@ -4066,19 +4041,46 @@ static inline u64 swevent_hash(u64 type, u32 event_id) | |||
4066 | return hash_64(val, SWEVENT_HLIST_BITS); | 4041 | return hash_64(val, SWEVENT_HLIST_BITS); |
4067 | } | 4042 | } |
4068 | 4043 | ||
4069 | static struct hlist_head * | 4044 | static inline struct hlist_head * |
4070 | find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id) | 4045 | __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) |
4071 | { | 4046 | { |
4072 | u64 hash; | 4047 | u64 hash = swevent_hash(type, event_id); |
4073 | struct swevent_hlist *hlist; | 4048 | |
4049 | return &hlist->heads[hash]; | ||
4050 | } | ||
4074 | 4051 | ||
4075 | hash = swevent_hash(type, event_id); | 4052 | /* For the read side: events when they trigger */ |
4053 | static inline struct hlist_head * | ||
4054 | find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) | ||
4055 | { | ||
4056 | struct swevent_hlist *hlist; | ||
4076 | 4057 | ||
4077 | hlist = rcu_dereference(ctx->swevent_hlist); | 4058 | hlist = rcu_dereference(ctx->swevent_hlist); |
4078 | if (!hlist) | 4059 | if (!hlist) |
4079 | return NULL; | 4060 | return NULL; |
4080 | 4061 | ||
4081 | return &hlist->heads[hash]; | 4062 | return __find_swevent_head(hlist, type, event_id); |
4063 | } | ||
4064 | |||
4065 | /* For the event head insertion and removal in the hlist */ | ||
4066 | static inline struct hlist_head * | ||
4067 | find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) | ||
4068 | { | ||
4069 | struct swevent_hlist *hlist; | ||
4070 | u32 event_id = event->attr.config; | ||
4071 | u64 type = event->attr.type; | ||
4072 | |||
4073 | /* | ||
4074 | * Event scheduling is always serialized against hlist allocation | ||
4075 | * and release. Which makes the protected version suitable here. | ||
4076 | * The context lock guarantees that. | ||
4077 | */ | ||
4078 | hlist = rcu_dereference_protected(ctx->swevent_hlist, | ||
4079 | lockdep_is_held(&event->ctx->lock)); | ||
4080 | if (!hlist) | ||
4081 | return NULL; | ||
4082 | |||
4083 | return __find_swevent_head(hlist, type, event_id); | ||
4082 | } | 4084 | } |
4083 | 4085 | ||
4084 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | 4086 | static void do_perf_sw_event(enum perf_type_id type, u32 event_id, |
@@ -4095,7 +4097,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, | |||
4095 | 4097 | ||
4096 | rcu_read_lock(); | 4098 | rcu_read_lock(); |
4097 | 4099 | ||
4098 | head = find_swevent_head(cpuctx, type, event_id); | 4100 | head = find_swevent_head_rcu(cpuctx, type, event_id); |
4099 | 4101 | ||
4100 | if (!head) | 4102 | if (!head) |
4101 | goto end; | 4103 | goto end; |
@@ -4110,7 +4112,7 @@ end: | |||
4110 | 4112 | ||
4111 | int perf_swevent_get_recursion_context(void) | 4113 | int perf_swevent_get_recursion_context(void) |
4112 | { | 4114 | { |
4113 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 4115 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
4114 | int rctx; | 4116 | int rctx; |
4115 | 4117 | ||
4116 | if (in_nmi()) | 4118 | if (in_nmi()) |
@@ -4122,10 +4124,8 @@ int perf_swevent_get_recursion_context(void) | |||
4122 | else | 4124 | else |
4123 | rctx = 0; | 4125 | rctx = 0; |
4124 | 4126 | ||
4125 | if (cpuctx->recursion[rctx]) { | 4127 | if (cpuctx->recursion[rctx]) |
4126 | put_cpu_var(perf_cpu_context); | ||
4127 | return -1; | 4128 | return -1; |
4128 | } | ||
4129 | 4129 | ||
4130 | cpuctx->recursion[rctx]++; | 4130 | cpuctx->recursion[rctx]++; |
4131 | barrier(); | 4131 | barrier(); |
@@ -4139,7 +4139,6 @@ void perf_swevent_put_recursion_context(int rctx) | |||
4139 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | 4139 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); |
4140 | barrier(); | 4140 | barrier(); |
4141 | cpuctx->recursion[rctx]--; | 4141 | cpuctx->recursion[rctx]--; |
4142 | put_cpu_var(perf_cpu_context); | ||
4143 | } | 4142 | } |
4144 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); | 4143 | EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); |
4145 | 4144 | ||
@@ -4150,6 +4149,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, | |||
4150 | struct perf_sample_data data; | 4149 | struct perf_sample_data data; |
4151 | int rctx; | 4150 | int rctx; |
4152 | 4151 | ||
4152 | preempt_disable_notrace(); | ||
4153 | rctx = perf_swevent_get_recursion_context(); | 4153 | rctx = perf_swevent_get_recursion_context(); |
4154 | if (rctx < 0) | 4154 | if (rctx < 0) |
4155 | return; | 4155 | return; |
@@ -4159,6 +4159,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi, | |||
4159 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); | 4159 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); |
4160 | 4160 | ||
4161 | perf_swevent_put_recursion_context(rctx); | 4161 | perf_swevent_put_recursion_context(rctx); |
4162 | preempt_enable_notrace(); | ||
4162 | } | 4163 | } |
4163 | 4164 | ||
4164 | static void perf_swevent_read(struct perf_event *event) | 4165 | static void perf_swevent_read(struct perf_event *event) |
@@ -4178,7 +4179,7 @@ static int perf_swevent_enable(struct perf_event *event) | |||
4178 | perf_swevent_set_period(event); | 4179 | perf_swevent_set_period(event); |
4179 | } | 4180 | } |
4180 | 4181 | ||
4181 | head = find_swevent_head(cpuctx, event->attr.type, event->attr.config); | 4182 | head = find_swevent_head(cpuctx, event); |
4182 | if (WARN_ON_ONCE(!head)) | 4183 | if (WARN_ON_ONCE(!head)) |
4183 | return -EINVAL; | 4184 | return -EINVAL; |
4184 | 4185 | ||
@@ -4366,6 +4367,14 @@ static const struct pmu perf_ops_task_clock = { | |||
4366 | .read = task_clock_perf_event_read, | 4367 | .read = task_clock_perf_event_read, |
4367 | }; | 4368 | }; |
4368 | 4369 | ||
4370 | /* Deref the hlist from the update side */ | ||
4371 | static inline struct swevent_hlist * | ||
4372 | swevent_hlist_deref(struct perf_cpu_context *cpuctx) | ||
4373 | { | ||
4374 | return rcu_dereference_protected(cpuctx->swevent_hlist, | ||
4375 | lockdep_is_held(&cpuctx->hlist_mutex)); | ||
4376 | } | ||
4377 | |||
4369 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | 4378 | static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) |
4370 | { | 4379 | { |
4371 | struct swevent_hlist *hlist; | 4380 | struct swevent_hlist *hlist; |
@@ -4376,12 +4385,11 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) | |||
4376 | 4385 | ||
4377 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) | 4386 | static void swevent_hlist_release(struct perf_cpu_context *cpuctx) |
4378 | { | 4387 | { |
4379 | struct swevent_hlist *hlist; | 4388 | struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); |
4380 | 4389 | ||
4381 | if (!cpuctx->swevent_hlist) | 4390 | if (!hlist) |
4382 | return; | 4391 | return; |
4383 | 4392 | ||
4384 | hlist = cpuctx->swevent_hlist; | ||
4385 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); | 4393 | rcu_assign_pointer(cpuctx->swevent_hlist, NULL); |
4386 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); | 4394 | call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); |
4387 | } | 4395 | } |
@@ -4418,7 +4426,7 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) | |||
4418 | 4426 | ||
4419 | mutex_lock(&cpuctx->hlist_mutex); | 4427 | mutex_lock(&cpuctx->hlist_mutex); |
4420 | 4428 | ||
4421 | if (!cpuctx->swevent_hlist && cpu_online(cpu)) { | 4429 | if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { |
4422 | struct swevent_hlist *hlist; | 4430 | struct swevent_hlist *hlist; |
4423 | 4431 | ||
4424 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); | 4432 | hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); |
@@ -4467,10 +4475,46 @@ static int swevent_hlist_get(struct perf_event *event) | |||
4467 | 4475 | ||
4468 | #ifdef CONFIG_EVENT_TRACING | 4476 | #ifdef CONFIG_EVENT_TRACING |
4469 | 4477 | ||
4470 | void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | 4478 | static const struct pmu perf_ops_tracepoint = { |
4471 | int entry_size, struct pt_regs *regs) | 4479 | .enable = perf_trace_enable, |
4480 | .disable = perf_trace_disable, | ||
4481 | .read = perf_swevent_read, | ||
4482 | .unthrottle = perf_swevent_unthrottle, | ||
4483 | }; | ||
4484 | |||
4485 | static int perf_tp_filter_match(struct perf_event *event, | ||
4486 | struct perf_sample_data *data) | ||
4487 | { | ||
4488 | void *record = data->raw->data; | ||
4489 | |||
4490 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4491 | return 1; | ||
4492 | return 0; | ||
4493 | } | ||
4494 | |||
4495 | static int perf_tp_event_match(struct perf_event *event, | ||
4496 | struct perf_sample_data *data, | ||
4497 | struct pt_regs *regs) | ||
4498 | { | ||
4499 | /* | ||
4500 | * All tracepoints are from kernel-space. | ||
4501 | */ | ||
4502 | if (event->attr.exclude_kernel) | ||
4503 | return 0; | ||
4504 | |||
4505 | if (!perf_tp_filter_match(event, data)) | ||
4506 | return 0; | ||
4507 | |||
4508 | return 1; | ||
4509 | } | ||
4510 | |||
4511 | void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, | ||
4512 | struct pt_regs *regs, struct hlist_head *head) | ||
4472 | { | 4513 | { |
4473 | struct perf_sample_data data; | 4514 | struct perf_sample_data data; |
4515 | struct perf_event *event; | ||
4516 | struct hlist_node *node; | ||
4517 | |||
4474 | struct perf_raw_record raw = { | 4518 | struct perf_raw_record raw = { |
4475 | .size = entry_size, | 4519 | .size = entry_size, |
4476 | .data = record, | 4520 | .data = record, |
@@ -4479,26 +4523,18 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, | |||
4479 | perf_sample_data_init(&data, addr); | 4523 | perf_sample_data_init(&data, addr); |
4480 | data.raw = &raw; | 4524 | data.raw = &raw; |
4481 | 4525 | ||
4482 | /* Trace events already protected against recursion */ | 4526 | rcu_read_lock(); |
4483 | do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, | 4527 | hlist_for_each_entry_rcu(event, node, head, hlist_entry) { |
4484 | &data, regs); | 4528 | if (perf_tp_event_match(event, &data, regs)) |
4529 | perf_swevent_add(event, count, 1, &data, regs); | ||
4530 | } | ||
4531 | rcu_read_unlock(); | ||
4485 | } | 4532 | } |
4486 | EXPORT_SYMBOL_GPL(perf_tp_event); | 4533 | EXPORT_SYMBOL_GPL(perf_tp_event); |
4487 | 4534 | ||
4488 | static int perf_tp_event_match(struct perf_event *event, | ||
4489 | struct perf_sample_data *data) | ||
4490 | { | ||
4491 | void *record = data->raw->data; | ||
4492 | |||
4493 | if (likely(!event->filter) || filter_match_preds(event->filter, record)) | ||
4494 | return 1; | ||
4495 | return 0; | ||
4496 | } | ||
4497 | |||
4498 | static void tp_perf_event_destroy(struct perf_event *event) | 4535 | static void tp_perf_event_destroy(struct perf_event *event) |
4499 | { | 4536 | { |
4500 | perf_trace_disable(event->attr.config); | 4537 | perf_trace_destroy(event); |
4501 | swevent_hlist_put(event); | ||
4502 | } | 4538 | } |
4503 | 4539 | ||
4504 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4540 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
@@ -4514,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) | |||
4514 | !capable(CAP_SYS_ADMIN)) | 4550 | !capable(CAP_SYS_ADMIN)) |
4515 | return ERR_PTR(-EPERM); | 4551 | return ERR_PTR(-EPERM); |
4516 | 4552 | ||
4517 | if (perf_trace_enable(event->attr.config)) | 4553 | err = perf_trace_init(event); |
4554 | if (err) | ||
4518 | return NULL; | 4555 | return NULL; |
4519 | 4556 | ||
4520 | event->destroy = tp_perf_event_destroy; | 4557 | event->destroy = tp_perf_event_destroy; |
4521 | err = swevent_hlist_get(event); | ||
4522 | if (err) { | ||
4523 | perf_trace_disable(event->attr.config); | ||
4524 | return ERR_PTR(err); | ||
4525 | } | ||
4526 | 4558 | ||
4527 | return &perf_ops_generic; | 4559 | return &perf_ops_tracepoint; |
4528 | } | 4560 | } |
4529 | 4561 | ||
4530 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) | 4562 | static int perf_event_set_filter(struct perf_event *event, void __user *arg) |
@@ -4552,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event) | |||
4552 | 4584 | ||
4553 | #else | 4585 | #else |
4554 | 4586 | ||
4555 | static int perf_tp_event_match(struct perf_event *event, | ||
4556 | struct perf_sample_data *data) | ||
4557 | { | ||
4558 | return 1; | ||
4559 | } | ||
4560 | |||
4561 | static const struct pmu *tp_perf_event_init(struct perf_event *event) | 4587 | static const struct pmu *tp_perf_event_init(struct perf_event *event) |
4562 | { | 4588 | { |
4563 | return NULL; | 4589 | return NULL; |
@@ -4894,6 +4920,13 @@ static int perf_event_set_output(struct perf_event *event, int output_fd) | |||
4894 | int fput_needed = 0; | 4920 | int fput_needed = 0; |
4895 | int ret = -EINVAL; | 4921 | int ret = -EINVAL; |
4896 | 4922 | ||
4923 | /* | ||
4924 | * Don't allow output of inherited per-task events. This would | ||
4925 | * create performance issues due to cross cpu access. | ||
4926 | */ | ||
4927 | if (event->cpu == -1 && event->attr.inherit) | ||
4928 | return -EINVAL; | ||
4929 | |||
4897 | if (!output_fd) | 4930 | if (!output_fd) |
4898 | goto set; | 4931 | goto set; |
4899 | 4932 | ||
@@ -4914,6 +4947,18 @@ static int perf_event_set_output(struct perf_event *event, int output_fd) | |||
4914 | if (event->data) | 4947 | if (event->data) |
4915 | goto out; | 4948 | goto out; |
4916 | 4949 | ||
4950 | /* | ||
4951 | * Don't allow cross-cpu buffers | ||
4952 | */ | ||
4953 | if (output_event->cpu != event->cpu) | ||
4954 | goto out; | ||
4955 | |||
4956 | /* | ||
4957 | * If its not a per-cpu buffer, it must be the same task. | ||
4958 | */ | ||
4959 | if (output_event->cpu == -1 && output_event->ctx != event->ctx) | ||
4960 | goto out; | ||
4961 | |||
4917 | atomic_long_inc(&output_file->f_count); | 4962 | atomic_long_inc(&output_file->f_count); |
4918 | 4963 | ||
4919 | set: | 4964 | set: |
diff --git a/kernel/profile.c b/kernel/profile.c index a55d3a367ae8..dfadc5b729f1 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -127,8 +127,10 @@ int __ref profile_init(void) | |||
127 | return 0; | 127 | return 0; |
128 | 128 | ||
129 | prof_buffer = vmalloc(buffer_bytes); | 129 | prof_buffer = vmalloc(buffer_bytes); |
130 | if (prof_buffer) | 130 | if (prof_buffer) { |
131 | memset(prof_buffer, 0, buffer_bytes); | ||
131 | return 0; | 132 | return 0; |
133 | } | ||
132 | 134 | ||
133 | free_cpumask_var(prof_cpu_mask); | 135 | free_cpumask_var(prof_cpu_mask); |
134 | return -ENOMEM; | 136 | return -ENOMEM; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 9fb51237b18c..6af9cdd558b7 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/highmem.h> | 15 | #include <linux/highmem.h> |
16 | #include <linux/pagemap.h> | 16 | #include <linux/pagemap.h> |
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/ptrace.h> | 17 | #include <linux/ptrace.h> |
19 | #include <linux/security.h> | 18 | #include <linux/security.h> |
20 | #include <linux/signal.h> | 19 | #include <linux/signal.h> |
@@ -665,10 +664,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | |||
665 | struct task_struct *child; | 664 | struct task_struct *child; |
666 | long ret; | 665 | long ret; |
667 | 666 | ||
668 | /* | ||
669 | * This lock_kernel fixes a subtle race with suid exec | ||
670 | */ | ||
671 | lock_kernel(); | ||
672 | if (request == PTRACE_TRACEME) { | 667 | if (request == PTRACE_TRACEME) { |
673 | ret = ptrace_traceme(); | 668 | ret = ptrace_traceme(); |
674 | if (!ret) | 669 | if (!ret) |
@@ -702,7 +697,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | |||
702 | out_put_task_struct: | 697 | out_put_task_struct: |
703 | put_task_struct(child); | 698 | put_task_struct(child); |
704 | out: | 699 | out: |
705 | unlock_kernel(); | ||
706 | return ret; | 700 | return ret; |
707 | } | 701 | } |
708 | 702 | ||
@@ -812,10 +806,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
812 | struct task_struct *child; | 806 | struct task_struct *child; |
813 | long ret; | 807 | long ret; |
814 | 808 | ||
815 | /* | ||
816 | * This lock_kernel fixes a subtle race with suid exec | ||
817 | */ | ||
818 | lock_kernel(); | ||
819 | if (request == PTRACE_TRACEME) { | 809 | if (request == PTRACE_TRACEME) { |
820 | ret = ptrace_traceme(); | 810 | ret = ptrace_traceme(); |
821 | goto out; | 811 | goto out; |
@@ -845,7 +835,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, | |||
845 | out_put_task_struct: | 835 | out_put_task_struct: |
846 | put_task_struct(child); | 836 | put_task_struct(child); |
847 | out: | 837 | out: |
848 | unlock_kernel(); | ||
849 | return ret; | 838 | return ret; |
850 | } | 839 | } |
851 | #endif /* CONFIG_COMPAT */ | 840 | #endif /* CONFIG_COMPAT */ |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 03a7ea1579f6..72a8dc9567f5 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <linux/cpu.h> | 44 | #include <linux/cpu.h> |
45 | #include <linux/mutex.h> | 45 | #include <linux/mutex.h> |
46 | #include <linux/module.h> | 46 | #include <linux/module.h> |
47 | #include <linux/kernel_stat.h> | ||
48 | #include <linux/hardirq.h> | 47 | #include <linux/hardirq.h> |
49 | 48 | ||
50 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
@@ -64,9 +63,6 @@ struct lockdep_map rcu_sched_lock_map = | |||
64 | EXPORT_SYMBOL_GPL(rcu_sched_lock_map); | 63 | EXPORT_SYMBOL_GPL(rcu_sched_lock_map); |
65 | #endif | 64 | #endif |
66 | 65 | ||
67 | int rcu_scheduler_active __read_mostly; | ||
68 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
69 | |||
70 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 66 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
71 | 67 | ||
72 | int debug_lockdep_rcu_enabled(void) | 68 | int debug_lockdep_rcu_enabled(void) |
@@ -97,21 +93,6 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |||
97 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 93 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
98 | 94 | ||
99 | /* | 95 | /* |
100 | * This function is invoked towards the end of the scheduler's initialization | ||
101 | * process. Before this is called, the idle task might contain | ||
102 | * RCU read-side critical sections (during which time, this idle | ||
103 | * task is booting the system). After this function is called, the | ||
104 | * idle tasks are prohibited from containing RCU read-side critical | ||
105 | * sections. | ||
106 | */ | ||
107 | void rcu_scheduler_starting(void) | ||
108 | { | ||
109 | WARN_ON(num_online_cpus() != 1); | ||
110 | WARN_ON(nr_context_switches() > 0); | ||
111 | rcu_scheduler_active = 1; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Awaken the corresponding synchronize_rcu() instance now that a | 96 | * Awaken the corresponding synchronize_rcu() instance now that a |
116 | * grace period has elapsed. | 97 | * grace period has elapsed. |
117 | */ | 98 | */ |
@@ -122,3 +103,14 @@ void wakeme_after_rcu(struct rcu_head *head) | |||
122 | rcu = container_of(head, struct rcu_synchronize, head); | 103 | rcu = container_of(head, struct rcu_synchronize, head); |
123 | complete(&rcu->completion); | 104 | complete(&rcu->completion); |
124 | } | 105 | } |
106 | |||
107 | #ifdef CONFIG_PROVE_RCU | ||
108 | /* | ||
109 | * wrapper function to avoid #include problems. | ||
110 | */ | ||
111 | int rcu_my_thread_group_empty(void) | ||
112 | { | ||
113 | return thread_group_empty(current); | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); | ||
116 | #endif /* #ifdef CONFIG_PROVE_RCU */ | ||
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 9f6d9ff2572c..38729d3cd236 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
@@ -44,9 +44,9 @@ struct rcu_ctrlblk { | |||
44 | }; | 44 | }; |
45 | 45 | ||
46 | /* Definition for rcupdate control block. */ | 46 | /* Definition for rcupdate control block. */ |
47 | static struct rcu_ctrlblk rcu_ctrlblk = { | 47 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { |
48 | .donetail = &rcu_ctrlblk.rcucblist, | 48 | .donetail = &rcu_sched_ctrlblk.rcucblist, |
49 | .curtail = &rcu_ctrlblk.rcucblist, | 49 | .curtail = &rcu_sched_ctrlblk.rcucblist, |
50 | }; | 50 | }; |
51 | 51 | ||
52 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | 52 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { |
@@ -54,6 +54,11 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { | |||
54 | .curtail = &rcu_bh_ctrlblk.rcucblist, | 54 | .curtail = &rcu_bh_ctrlblk.rcucblist, |
55 | }; | 55 | }; |
56 | 56 | ||
57 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
58 | int rcu_scheduler_active __read_mostly; | ||
59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
61 | |||
57 | #ifdef CONFIG_NO_HZ | 62 | #ifdef CONFIG_NO_HZ |
58 | 63 | ||
59 | static long rcu_dynticks_nesting = 1; | 64 | static long rcu_dynticks_nesting = 1; |
@@ -108,7 +113,8 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | |||
108 | */ | 113 | */ |
109 | void rcu_sched_qs(int cpu) | 114 | void rcu_sched_qs(int cpu) |
110 | { | 115 | { |
111 | if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) | 116 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + |
117 | rcu_qsctr_help(&rcu_bh_ctrlblk)) | ||
112 | raise_softirq(RCU_SOFTIRQ); | 118 | raise_softirq(RCU_SOFTIRQ); |
113 | } | 119 | } |
114 | 120 | ||
@@ -173,7 +179,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
173 | */ | 179 | */ |
174 | static void rcu_process_callbacks(struct softirq_action *unused) | 180 | static void rcu_process_callbacks(struct softirq_action *unused) |
175 | { | 181 | { |
176 | __rcu_process_callbacks(&rcu_ctrlblk); | 182 | __rcu_process_callbacks(&rcu_sched_ctrlblk); |
177 | __rcu_process_callbacks(&rcu_bh_ctrlblk); | 183 | __rcu_process_callbacks(&rcu_bh_ctrlblk); |
178 | } | 184 | } |
179 | 185 | ||
@@ -187,7 +193,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) | |||
187 | * | 193 | * |
188 | * Cool, huh? (Due to Josh Triplett.) | 194 | * Cool, huh? (Due to Josh Triplett.) |
189 | * | 195 | * |
190 | * But we want to make this a static inline later. | 196 | * But we want to make this a static inline later. The cond_resched() |
197 | * currently makes this problematic. | ||
191 | */ | 198 | */ |
192 | void synchronize_sched(void) | 199 | void synchronize_sched(void) |
193 | { | 200 | { |
@@ -195,12 +202,6 @@ void synchronize_sched(void) | |||
195 | } | 202 | } |
196 | EXPORT_SYMBOL_GPL(synchronize_sched); | 203 | EXPORT_SYMBOL_GPL(synchronize_sched); |
197 | 204 | ||
198 | void synchronize_rcu_bh(void) | ||
199 | { | ||
200 | synchronize_sched(); | ||
201 | } | ||
202 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | ||
203 | |||
204 | /* | 205 | /* |
205 | * Helper function for call_rcu() and call_rcu_bh(). | 206 | * Helper function for call_rcu() and call_rcu_bh(). |
206 | */ | 207 | */ |
@@ -226,7 +227,7 @@ static void __call_rcu(struct rcu_head *head, | |||
226 | */ | 227 | */ |
227 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | 228 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) |
228 | { | 229 | { |
229 | __call_rcu(head, func, &rcu_ctrlblk); | 230 | __call_rcu(head, func, &rcu_sched_ctrlblk); |
230 | } | 231 | } |
231 | EXPORT_SYMBOL_GPL(call_rcu); | 232 | EXPORT_SYMBOL_GPL(call_rcu); |
232 | 233 | ||
@@ -244,11 +245,13 @@ void rcu_barrier(void) | |||
244 | { | 245 | { |
245 | struct rcu_synchronize rcu; | 246 | struct rcu_synchronize rcu; |
246 | 247 | ||
248 | init_rcu_head_on_stack(&rcu.head); | ||
247 | init_completion(&rcu.completion); | 249 | init_completion(&rcu.completion); |
248 | /* Will wake me after RCU finished. */ | 250 | /* Will wake me after RCU finished. */ |
249 | call_rcu(&rcu.head, wakeme_after_rcu); | 251 | call_rcu(&rcu.head, wakeme_after_rcu); |
250 | /* Wait for it. */ | 252 | /* Wait for it. */ |
251 | wait_for_completion(&rcu.completion); | 253 | wait_for_completion(&rcu.completion); |
254 | destroy_rcu_head_on_stack(&rcu.head); | ||
252 | } | 255 | } |
253 | EXPORT_SYMBOL_GPL(rcu_barrier); | 256 | EXPORT_SYMBOL_GPL(rcu_barrier); |
254 | 257 | ||
@@ -256,11 +259,13 @@ void rcu_barrier_bh(void) | |||
256 | { | 259 | { |
257 | struct rcu_synchronize rcu; | 260 | struct rcu_synchronize rcu; |
258 | 261 | ||
262 | init_rcu_head_on_stack(&rcu.head); | ||
259 | init_completion(&rcu.completion); | 263 | init_completion(&rcu.completion); |
260 | /* Will wake me after RCU finished. */ | 264 | /* Will wake me after RCU finished. */ |
261 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | 265 | call_rcu_bh(&rcu.head, wakeme_after_rcu); |
262 | /* Wait for it. */ | 266 | /* Wait for it. */ |
263 | wait_for_completion(&rcu.completion); | 267 | wait_for_completion(&rcu.completion); |
268 | destroy_rcu_head_on_stack(&rcu.head); | ||
264 | } | 269 | } |
265 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); | 270 | EXPORT_SYMBOL_GPL(rcu_barrier_bh); |
266 | 271 | ||
@@ -268,11 +273,13 @@ void rcu_barrier_sched(void) | |||
268 | { | 273 | { |
269 | struct rcu_synchronize rcu; | 274 | struct rcu_synchronize rcu; |
270 | 275 | ||
276 | init_rcu_head_on_stack(&rcu.head); | ||
271 | init_completion(&rcu.completion); | 277 | init_completion(&rcu.completion); |
272 | /* Will wake me after RCU finished. */ | 278 | /* Will wake me after RCU finished. */ |
273 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | 279 | call_rcu_sched(&rcu.head, wakeme_after_rcu); |
274 | /* Wait for it. */ | 280 | /* Wait for it. */ |
275 | wait_for_completion(&rcu.completion); | 281 | wait_for_completion(&rcu.completion); |
282 | destroy_rcu_head_on_stack(&rcu.head); | ||
276 | } | 283 | } |
277 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 284 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
278 | 285 | ||
@@ -280,3 +287,5 @@ void __init rcu_init(void) | |||
280 | { | 287 | { |
281 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 288 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); |
282 | } | 289 | } |
290 | |||
291 | #include "rcutiny_plugin.h" | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h new file mode 100644 index 000000000000..d223a92bc742 --- /dev/null +++ b/kernel/rcutiny_plugin.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Read-Copy Update mechanism for mutual exclusion (tree-based version) | ||
3 | * Internal non-public definitions that provide either classic | ||
4 | * or preemptable semantics. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright IBM Corporation, 2009 | ||
21 | * | ||
22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||
23 | */ | ||
24 | |||
25 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
26 | |||
27 | #include <linux/kernel_stat.h> | ||
28 | |||
29 | /* | ||
30 | * During boot, we forgive RCU lockdep issues. After this function is | ||
31 | * invoked, we start taking RCU lockdep issues seriously. | ||
32 | */ | ||
33 | void rcu_scheduler_starting(void) | ||
34 | { | ||
35 | WARN_ON(nr_context_switches() > 0); | ||
36 | rcu_scheduler_active = 1; | ||
37 | } | ||
38 | |||
39 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2b676f3a0f26..6535ac8bc6a5 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -464,9 +464,11 @@ static void rcu_bh_torture_synchronize(void) | |||
464 | { | 464 | { |
465 | struct rcu_bh_torture_synchronize rcu; | 465 | struct rcu_bh_torture_synchronize rcu; |
466 | 466 | ||
467 | init_rcu_head_on_stack(&rcu.head); | ||
467 | init_completion(&rcu.completion); | 468 | init_completion(&rcu.completion); |
468 | call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); | 469 | call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); |
469 | wait_for_completion(&rcu.completion); | 470 | wait_for_completion(&rcu.completion); |
471 | destroy_rcu_head_on_stack(&rcu.head); | ||
470 | } | 472 | } |
471 | 473 | ||
472 | static struct rcu_torture_ops rcu_bh_ops = { | 474 | static struct rcu_torture_ops rcu_bh_ops = { |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3ec8160fc75f..d4437345706f 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/cpu.h> | 46 | #include <linux/cpu.h> |
47 | #include <linux/mutex.h> | 47 | #include <linux/mutex.h> |
48 | #include <linux/time.h> | 48 | #include <linux/time.h> |
49 | #include <linux/kernel_stat.h> | ||
49 | 50 | ||
50 | #include "rcutree.h" | 51 | #include "rcutree.h" |
51 | 52 | ||
@@ -53,8 +54,8 @@ | |||
53 | 54 | ||
54 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | 55 | static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; |
55 | 56 | ||
56 | #define RCU_STATE_INITIALIZER(name) { \ | 57 | #define RCU_STATE_INITIALIZER(structname) { \ |
57 | .level = { &name.node[0] }, \ | 58 | .level = { &structname.node[0] }, \ |
58 | .levelcnt = { \ | 59 | .levelcnt = { \ |
59 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ | 60 | NUM_RCU_LVL_0, /* root of hierarchy. */ \ |
60 | NUM_RCU_LVL_1, \ | 61 | NUM_RCU_LVL_1, \ |
@@ -65,13 +66,14 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
65 | .signaled = RCU_GP_IDLE, \ | 66 | .signaled = RCU_GP_IDLE, \ |
66 | .gpnum = -300, \ | 67 | .gpnum = -300, \ |
67 | .completed = -300, \ | 68 | .completed = -300, \ |
68 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \ | 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ |
69 | .orphan_cbs_list = NULL, \ | 70 | .orphan_cbs_list = NULL, \ |
70 | .orphan_cbs_tail = &name.orphan_cbs_list, \ | 71 | .orphan_cbs_tail = &structname.orphan_cbs_list, \ |
71 | .orphan_qlen = 0, \ | 72 | .orphan_qlen = 0, \ |
72 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \ | 73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ |
73 | .n_force_qs = 0, \ | 74 | .n_force_qs = 0, \ |
74 | .n_force_qs_ngp = 0, \ | 75 | .n_force_qs_ngp = 0, \ |
76 | .name = #structname, \ | ||
75 | } | 77 | } |
76 | 78 | ||
77 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); | 79 | struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); |
@@ -80,6 +82,9 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); | |||
80 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); | 82 | struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); |
81 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); | 83 | DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); |
82 | 84 | ||
85 | int rcu_scheduler_active __read_mostly; | ||
86 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
87 | |||
83 | /* | 88 | /* |
84 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s | 89 | * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s |
85 | * permit this function to be invoked without holding the root rcu_node | 90 | * permit this function to be invoked without holding the root rcu_node |
@@ -97,25 +102,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) | |||
97 | */ | 102 | */ |
98 | void rcu_sched_qs(int cpu) | 103 | void rcu_sched_qs(int cpu) |
99 | { | 104 | { |
100 | struct rcu_data *rdp; | 105 | struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); |
101 | 106 | ||
102 | rdp = &per_cpu(rcu_sched_data, cpu); | ||
103 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 107 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
104 | barrier(); | 108 | barrier(); |
105 | rdp->passed_quiesc = 1; | 109 | rdp->passed_quiesc = 1; |
106 | rcu_preempt_note_context_switch(cpu); | ||
107 | } | 110 | } |
108 | 111 | ||
109 | void rcu_bh_qs(int cpu) | 112 | void rcu_bh_qs(int cpu) |
110 | { | 113 | { |
111 | struct rcu_data *rdp; | 114 | struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); |
112 | 115 | ||
113 | rdp = &per_cpu(rcu_bh_data, cpu); | ||
114 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 116 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
115 | barrier(); | 117 | barrier(); |
116 | rdp->passed_quiesc = 1; | 118 | rdp->passed_quiesc = 1; |
117 | } | 119 | } |
118 | 120 | ||
121 | /* | ||
122 | * Note a context switch. This is a quiescent state for RCU-sched, | ||
123 | * and requires special handling for preemptible RCU. | ||
124 | */ | ||
125 | void rcu_note_context_switch(int cpu) | ||
126 | { | ||
127 | rcu_sched_qs(cpu); | ||
128 | rcu_preempt_note_context_switch(cpu); | ||
129 | } | ||
130 | |||
119 | #ifdef CONFIG_NO_HZ | 131 | #ifdef CONFIG_NO_HZ |
120 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | 132 | DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { |
121 | .dynticks_nesting = 1, | 133 | .dynticks_nesting = 1, |
@@ -438,6 +450,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) | |||
438 | 450 | ||
439 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | 451 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR |
440 | 452 | ||
453 | int rcu_cpu_stall_panicking __read_mostly; | ||
454 | |||
441 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 455 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
442 | { | 456 | { |
443 | rsp->gp_start = jiffies; | 457 | rsp->gp_start = jiffies; |
@@ -470,7 +484,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
470 | 484 | ||
471 | /* OK, time to rat on our buddy... */ | 485 | /* OK, time to rat on our buddy... */ |
472 | 486 | ||
473 | printk(KERN_ERR "INFO: RCU detected CPU stalls:"); | 487 | printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", |
488 | rsp->name); | ||
474 | rcu_for_each_leaf_node(rsp, rnp) { | 489 | rcu_for_each_leaf_node(rsp, rnp) { |
475 | raw_spin_lock_irqsave(&rnp->lock, flags); | 490 | raw_spin_lock_irqsave(&rnp->lock, flags); |
476 | rcu_print_task_stall(rnp); | 491 | rcu_print_task_stall(rnp); |
@@ -481,7 +496,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) | |||
481 | if (rnp->qsmask & (1UL << cpu)) | 496 | if (rnp->qsmask & (1UL << cpu)) |
482 | printk(" %d", rnp->grplo + cpu); | 497 | printk(" %d", rnp->grplo + cpu); |
483 | } | 498 | } |
484 | printk(" (detected by %d, t=%ld jiffies)\n", | 499 | printk("} (detected by %d, t=%ld jiffies)\n", |
485 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); | 500 | smp_processor_id(), (long)(jiffies - rsp->gp_start)); |
486 | trigger_all_cpu_backtrace(); | 501 | trigger_all_cpu_backtrace(); |
487 | 502 | ||
@@ -497,8 +512,8 @@ static void print_cpu_stall(struct rcu_state *rsp) | |||
497 | unsigned long flags; | 512 | unsigned long flags; |
498 | struct rcu_node *rnp = rcu_get_root(rsp); | 513 | struct rcu_node *rnp = rcu_get_root(rsp); |
499 | 514 | ||
500 | printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", | 515 | printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", |
501 | smp_processor_id(), jiffies - rsp->gp_start); | 516 | rsp->name, smp_processor_id(), jiffies - rsp->gp_start); |
502 | trigger_all_cpu_backtrace(); | 517 | trigger_all_cpu_backtrace(); |
503 | 518 | ||
504 | raw_spin_lock_irqsave(&rnp->lock, flags); | 519 | raw_spin_lock_irqsave(&rnp->lock, flags); |
@@ -515,6 +530,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
515 | long delta; | 530 | long delta; |
516 | struct rcu_node *rnp; | 531 | struct rcu_node *rnp; |
517 | 532 | ||
533 | if (rcu_cpu_stall_panicking) | ||
534 | return; | ||
518 | delta = jiffies - rsp->jiffies_stall; | 535 | delta = jiffies - rsp->jiffies_stall; |
519 | rnp = rdp->mynode; | 536 | rnp = rdp->mynode; |
520 | if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { | 537 | if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { |
@@ -529,6 +546,21 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
529 | } | 546 | } |
530 | } | 547 | } |
531 | 548 | ||
549 | static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) | ||
550 | { | ||
551 | rcu_cpu_stall_panicking = 1; | ||
552 | return NOTIFY_DONE; | ||
553 | } | ||
554 | |||
555 | static struct notifier_block rcu_panic_block = { | ||
556 | .notifier_call = rcu_panic, | ||
557 | }; | ||
558 | |||
559 | static void __init check_cpu_stall_init(void) | ||
560 | { | ||
561 | atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); | ||
562 | } | ||
563 | |||
532 | #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 564 | #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
533 | 565 | ||
534 | static void record_gp_stall_check_time(struct rcu_state *rsp) | 566 | static void record_gp_stall_check_time(struct rcu_state *rsp) |
@@ -539,6 +571,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) | |||
539 | { | 571 | { |
540 | } | 572 | } |
541 | 573 | ||
574 | static void __init check_cpu_stall_init(void) | ||
575 | { | ||
576 | } | ||
577 | |||
542 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 578 | #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
543 | 579 | ||
544 | /* | 580 | /* |
@@ -1125,8 +1161,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1125 | */ | 1161 | */ |
1126 | void rcu_check_callbacks(int cpu, int user) | 1162 | void rcu_check_callbacks(int cpu, int user) |
1127 | { | 1163 | { |
1128 | if (!rcu_pending(cpu)) | ||
1129 | return; /* if nothing for RCU to do. */ | ||
1130 | if (user || | 1164 | if (user || |
1131 | (idle_cpu(cpu) && rcu_scheduler_active && | 1165 | (idle_cpu(cpu) && rcu_scheduler_active && |
1132 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | 1166 | !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { |
@@ -1158,7 +1192,8 @@ void rcu_check_callbacks(int cpu, int user) | |||
1158 | rcu_bh_qs(cpu); | 1192 | rcu_bh_qs(cpu); |
1159 | } | 1193 | } |
1160 | rcu_preempt_check_callbacks(cpu); | 1194 | rcu_preempt_check_callbacks(cpu); |
1161 | raise_softirq(RCU_SOFTIRQ); | 1195 | if (rcu_pending(cpu)) |
1196 | raise_softirq(RCU_SOFTIRQ); | ||
1162 | } | 1197 | } |
1163 | 1198 | ||
1164 | #ifdef CONFIG_SMP | 1199 | #ifdef CONFIG_SMP |
@@ -1236,11 +1271,11 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) | |||
1236 | break; /* grace period idle or initializing, ignore. */ | 1271 | break; /* grace period idle or initializing, ignore. */ |
1237 | 1272 | ||
1238 | case RCU_SAVE_DYNTICK: | 1273 | case RCU_SAVE_DYNTICK: |
1239 | |||
1240 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
1241 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) | 1274 | if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) |
1242 | break; /* So gcc recognizes the dead code. */ | 1275 | break; /* So gcc recognizes the dead code. */ |
1243 | 1276 | ||
1277 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
1278 | |||
1244 | /* Record dyntick-idle state. */ | 1279 | /* Record dyntick-idle state. */ |
1245 | force_qs_rnp(rsp, dyntick_save_progress_counter); | 1280 | force_qs_rnp(rsp, dyntick_save_progress_counter); |
1246 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ | 1281 | raw_spin_lock(&rnp->lock); /* irqs already disabled */ |
@@ -1449,11 +1484,13 @@ void synchronize_sched(void) | |||
1449 | if (rcu_blocking_is_gp()) | 1484 | if (rcu_blocking_is_gp()) |
1450 | return; | 1485 | return; |
1451 | 1486 | ||
1487 | init_rcu_head_on_stack(&rcu.head); | ||
1452 | init_completion(&rcu.completion); | 1488 | init_completion(&rcu.completion); |
1453 | /* Will wake me after RCU finished. */ | 1489 | /* Will wake me after RCU finished. */ |
1454 | call_rcu_sched(&rcu.head, wakeme_after_rcu); | 1490 | call_rcu_sched(&rcu.head, wakeme_after_rcu); |
1455 | /* Wait for it. */ | 1491 | /* Wait for it. */ |
1456 | wait_for_completion(&rcu.completion); | 1492 | wait_for_completion(&rcu.completion); |
1493 | destroy_rcu_head_on_stack(&rcu.head); | ||
1457 | } | 1494 | } |
1458 | EXPORT_SYMBOL_GPL(synchronize_sched); | 1495 | EXPORT_SYMBOL_GPL(synchronize_sched); |
1459 | 1496 | ||
@@ -1473,11 +1510,13 @@ void synchronize_rcu_bh(void) | |||
1473 | if (rcu_blocking_is_gp()) | 1510 | if (rcu_blocking_is_gp()) |
1474 | return; | 1511 | return; |
1475 | 1512 | ||
1513 | init_rcu_head_on_stack(&rcu.head); | ||
1476 | init_completion(&rcu.completion); | 1514 | init_completion(&rcu.completion); |
1477 | /* Will wake me after RCU finished. */ | 1515 | /* Will wake me after RCU finished. */ |
1478 | call_rcu_bh(&rcu.head, wakeme_after_rcu); | 1516 | call_rcu_bh(&rcu.head, wakeme_after_rcu); |
1479 | /* Wait for it. */ | 1517 | /* Wait for it. */ |
1480 | wait_for_completion(&rcu.completion); | 1518 | wait_for_completion(&rcu.completion); |
1519 | destroy_rcu_head_on_stack(&rcu.head); | ||
1481 | } | 1520 | } |
1482 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); | 1521 | EXPORT_SYMBOL_GPL(synchronize_rcu_bh); |
1483 | 1522 | ||
@@ -1498,8 +1537,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1498 | check_cpu_stall(rsp, rdp); | 1537 | check_cpu_stall(rsp, rdp); |
1499 | 1538 | ||
1500 | /* Is the RCU core waiting for a quiescent state from this CPU? */ | 1539 | /* Is the RCU core waiting for a quiescent state from this CPU? */ |
1501 | if (rdp->qs_pending) { | 1540 | if (rdp->qs_pending && !rdp->passed_quiesc) { |
1541 | |||
1542 | /* | ||
1543 | * If force_quiescent_state() coming soon and this CPU | ||
1544 | * needs a quiescent state, and this is either RCU-sched | ||
1545 | * or RCU-bh, force a local reschedule. | ||
1546 | */ | ||
1502 | rdp->n_rp_qs_pending++; | 1547 | rdp->n_rp_qs_pending++; |
1548 | if (!rdp->preemptable && | ||
1549 | ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, | ||
1550 | jiffies)) | ||
1551 | set_need_resched(); | ||
1552 | } else if (rdp->qs_pending && rdp->passed_quiesc) { | ||
1553 | rdp->n_rp_report_qs++; | ||
1503 | return 1; | 1554 | return 1; |
1504 | } | 1555 | } |
1505 | 1556 | ||
@@ -1767,6 +1818,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
1767 | } | 1818 | } |
1768 | 1819 | ||
1769 | /* | 1820 | /* |
1821 | * This function is invoked towards the end of the scheduler's initialization | ||
1822 | * process. Before this is called, the idle task might contain | ||
1823 | * RCU read-side critical sections (during which time, this idle | ||
1824 | * task is booting the system). After this function is called, the | ||
1825 | * idle tasks are prohibited from containing RCU read-side critical | ||
1826 | * sections. This function also enables RCU lockdep checking. | ||
1827 | */ | ||
1828 | void rcu_scheduler_starting(void) | ||
1829 | { | ||
1830 | WARN_ON(num_online_cpus() != 1); | ||
1831 | WARN_ON(nr_context_switches() > 0); | ||
1832 | rcu_scheduler_active = 1; | ||
1833 | } | ||
1834 | |||
1835 | /* | ||
1770 | * Compute the per-level fanout, either using the exact fanout specified | 1836 | * Compute the per-level fanout, either using the exact fanout specified |
1771 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. | 1837 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. |
1772 | */ | 1838 | */ |
@@ -1849,6 +1915,14 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
1849 | INIT_LIST_HEAD(&rnp->blocked_tasks[3]); | 1915 | INIT_LIST_HEAD(&rnp->blocked_tasks[3]); |
1850 | } | 1916 | } |
1851 | } | 1917 | } |
1918 | |||
1919 | rnp = rsp->level[NUM_RCU_LVLS - 1]; | ||
1920 | for_each_possible_cpu(i) { | ||
1921 | while (i > rnp->grphi) | ||
1922 | rnp++; | ||
1923 | rsp->rda[i]->mynode = rnp; | ||
1924 | rcu_boot_init_percpu_data(i, rsp); | ||
1925 | } | ||
1852 | } | 1926 | } |
1853 | 1927 | ||
1854 | /* | 1928 | /* |
@@ -1859,19 +1933,11 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
1859 | #define RCU_INIT_FLAVOR(rsp, rcu_data) \ | 1933 | #define RCU_INIT_FLAVOR(rsp, rcu_data) \ |
1860 | do { \ | 1934 | do { \ |
1861 | int i; \ | 1935 | int i; \ |
1862 | int j; \ | ||
1863 | struct rcu_node *rnp; \ | ||
1864 | \ | 1936 | \ |
1865 | rcu_init_one(rsp); \ | ||
1866 | rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ | ||
1867 | j = 0; \ | ||
1868 | for_each_possible_cpu(i) { \ | 1937 | for_each_possible_cpu(i) { \ |
1869 | if (i > rnp[j].grphi) \ | ||
1870 | j++; \ | ||
1871 | per_cpu(rcu_data, i).mynode = &rnp[j]; \ | ||
1872 | (rsp)->rda[i] = &per_cpu(rcu_data, i); \ | 1938 | (rsp)->rda[i] = &per_cpu(rcu_data, i); \ |
1873 | rcu_boot_init_percpu_data(i, rsp); \ | ||
1874 | } \ | 1939 | } \ |
1940 | rcu_init_one(rsp); \ | ||
1875 | } while (0) | 1941 | } while (0) |
1876 | 1942 | ||
1877 | void __init rcu_init(void) | 1943 | void __init rcu_init(void) |
@@ -1879,12 +1945,6 @@ void __init rcu_init(void) | |||
1879 | int cpu; | 1945 | int cpu; |
1880 | 1946 | ||
1881 | rcu_bootup_announce(); | 1947 | rcu_bootup_announce(); |
1882 | #ifdef CONFIG_RCU_CPU_STALL_DETECTOR | ||
1883 | printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); | ||
1884 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | ||
1885 | #if NUM_RCU_LVL_4 != 0 | ||
1886 | printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n"); | ||
1887 | #endif /* #if NUM_RCU_LVL_4 != 0 */ | ||
1888 | RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); | 1948 | RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); |
1889 | RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); | 1949 | RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); |
1890 | __rcu_init_preempt(); | 1950 | __rcu_init_preempt(); |
@@ -1898,6 +1958,7 @@ void __init rcu_init(void) | |||
1898 | cpu_notifier(rcu_cpu_notify, 0); | 1958 | cpu_notifier(rcu_cpu_notify, 0); |
1899 | for_each_online_cpu(cpu) | 1959 | for_each_online_cpu(cpu) |
1900 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); | 1960 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); |
1961 | check_cpu_stall_init(); | ||
1901 | } | 1962 | } |
1902 | 1963 | ||
1903 | #include "rcutree_plugin.h" | 1964 | #include "rcutree_plugin.h" |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4a525a30e08e..14c040b18ed0 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -223,6 +223,7 @@ struct rcu_data { | |||
223 | /* 5) __rcu_pending() statistics. */ | 223 | /* 5) __rcu_pending() statistics. */ |
224 | unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ | 224 | unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ |
225 | unsigned long n_rp_qs_pending; | 225 | unsigned long n_rp_qs_pending; |
226 | unsigned long n_rp_report_qs; | ||
226 | unsigned long n_rp_cb_ready; | 227 | unsigned long n_rp_cb_ready; |
227 | unsigned long n_rp_cpu_needs_gp; | 228 | unsigned long n_rp_cpu_needs_gp; |
228 | unsigned long n_rp_gp_completed; | 229 | unsigned long n_rp_gp_completed; |
@@ -326,6 +327,7 @@ struct rcu_state { | |||
326 | unsigned long jiffies_stall; /* Time at which to check */ | 327 | unsigned long jiffies_stall; /* Time at which to check */ |
327 | /* for CPU stalls. */ | 328 | /* for CPU stalls. */ |
328 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 329 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ |
330 | char *name; /* Name of structure. */ | ||
329 | }; | 331 | }; |
330 | 332 | ||
331 | /* Return values for rcu_preempt_offline_tasks(). */ | 333 | /* Return values for rcu_preempt_offline_tasks(). */ |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 79b53bda8943..0e4f420245d9 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -26,6 +26,45 @@ | |||
26 | 26 | ||
27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
28 | 28 | ||
29 | /* | ||
30 | * Check the RCU kernel configuration parameters and print informative | ||
31 | * messages about anything out of the ordinary. If you like #ifdef, you | ||
32 | * will love this function. | ||
33 | */ | ||
34 | static void __init rcu_bootup_announce_oddness(void) | ||
35 | { | ||
36 | #ifdef CONFIG_RCU_TRACE | ||
37 | printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); | ||
38 | #endif | ||
39 | #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) | ||
40 | printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", | ||
41 | CONFIG_RCU_FANOUT); | ||
42 | #endif | ||
43 | #ifdef CONFIG_RCU_FANOUT_EXACT | ||
44 | printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); | ||
45 | #endif | ||
46 | #ifdef CONFIG_RCU_FAST_NO_HZ | ||
47 | printk(KERN_INFO | ||
48 | "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); | ||
49 | #endif | ||
50 | #ifdef CONFIG_PROVE_RCU | ||
51 | printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); | ||
52 | #endif | ||
53 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE | ||
54 | printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); | ||
55 | #endif | ||
56 | #ifndef CONFIG_RCU_CPU_STALL_DETECTOR | ||
57 | printk(KERN_INFO | ||
58 | "\tRCU-based detection of stalled CPUs is disabled.\n"); | ||
59 | #endif | ||
60 | #ifndef CONFIG_RCU_CPU_STALL_VERBOSE | ||
61 | printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); | ||
62 | #endif | ||
63 | #if NUM_RCU_LVL_4 != 0 | ||
64 | printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); | ||
65 | #endif | ||
66 | } | ||
67 | |||
29 | #ifdef CONFIG_TREE_PREEMPT_RCU | 68 | #ifdef CONFIG_TREE_PREEMPT_RCU |
30 | 69 | ||
31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 70 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
@@ -38,8 +77,8 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp); | |||
38 | */ | 77 | */ |
39 | static void __init rcu_bootup_announce(void) | 78 | static void __init rcu_bootup_announce(void) |
40 | { | 79 | { |
41 | printk(KERN_INFO | 80 | printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); |
42 | "Experimental preemptable hierarchical RCU implementation.\n"); | 81 | rcu_bootup_announce_oddness(); |
43 | } | 82 | } |
44 | 83 | ||
45 | /* | 84 | /* |
@@ -75,13 +114,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | |||
75 | * that this just means that the task currently running on the CPU is | 114 | * that this just means that the task currently running on the CPU is |
76 | * not in a quiescent state. There might be any number of tasks blocked | 115 | * not in a quiescent state. There might be any number of tasks blocked |
77 | * while in an RCU read-side critical section. | 116 | * while in an RCU read-side critical section. |
117 | * | ||
118 | * Unlike the other rcu_*_qs() functions, callers to this function | ||
119 | * must disable irqs in order to protect the assignment to | ||
120 | * ->rcu_read_unlock_special. | ||
78 | */ | 121 | */ |
79 | static void rcu_preempt_qs(int cpu) | 122 | static void rcu_preempt_qs(int cpu) |
80 | { | 123 | { |
81 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 124 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); |
125 | |||
82 | rdp->passed_quiesc_completed = rdp->gpnum - 1; | 126 | rdp->passed_quiesc_completed = rdp->gpnum - 1; |
83 | barrier(); | 127 | barrier(); |
84 | rdp->passed_quiesc = 1; | 128 | rdp->passed_quiesc = 1; |
129 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
85 | } | 130 | } |
86 | 131 | ||
87 | /* | 132 | /* |
@@ -144,9 +189,8 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
144 | * grace period, then the fact that the task has been enqueued | 189 | * grace period, then the fact that the task has been enqueued |
145 | * means that we continue to block the current grace period. | 190 | * means that we continue to block the current grace period. |
146 | */ | 191 | */ |
147 | rcu_preempt_qs(cpu); | ||
148 | local_irq_save(flags); | 192 | local_irq_save(flags); |
149 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 193 | rcu_preempt_qs(cpu); |
150 | local_irq_restore(flags); | 194 | local_irq_restore(flags); |
151 | } | 195 | } |
152 | 196 | ||
@@ -236,7 +280,6 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
236 | */ | 280 | */ |
237 | special = t->rcu_read_unlock_special; | 281 | special = t->rcu_read_unlock_special; |
238 | if (special & RCU_READ_UNLOCK_NEED_QS) { | 282 | if (special & RCU_READ_UNLOCK_NEED_QS) { |
239 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
240 | rcu_preempt_qs(smp_processor_id()); | 283 | rcu_preempt_qs(smp_processor_id()); |
241 | } | 284 | } |
242 | 285 | ||
@@ -473,7 +516,6 @@ static void rcu_preempt_check_callbacks(int cpu) | |||
473 | struct task_struct *t = current; | 516 | struct task_struct *t = current; |
474 | 517 | ||
475 | if (t->rcu_read_lock_nesting == 0) { | 518 | if (t->rcu_read_lock_nesting == 0) { |
476 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | ||
477 | rcu_preempt_qs(cpu); | 519 | rcu_preempt_qs(cpu); |
478 | return; | 520 | return; |
479 | } | 521 | } |
@@ -515,11 +557,13 @@ void synchronize_rcu(void) | |||
515 | if (!rcu_scheduler_active) | 557 | if (!rcu_scheduler_active) |
516 | return; | 558 | return; |
517 | 559 | ||
560 | init_rcu_head_on_stack(&rcu.head); | ||
518 | init_completion(&rcu.completion); | 561 | init_completion(&rcu.completion); |
519 | /* Will wake me after RCU finished. */ | 562 | /* Will wake me after RCU finished. */ |
520 | call_rcu(&rcu.head, wakeme_after_rcu); | 563 | call_rcu(&rcu.head, wakeme_after_rcu); |
521 | /* Wait for it. */ | 564 | /* Wait for it. */ |
522 | wait_for_completion(&rcu.completion); | 565 | wait_for_completion(&rcu.completion); |
566 | destroy_rcu_head_on_stack(&rcu.head); | ||
523 | } | 567 | } |
524 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 568 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
525 | 569 | ||
@@ -754,6 +798,7 @@ void exit_rcu(void) | |||
754 | static void __init rcu_bootup_announce(void) | 798 | static void __init rcu_bootup_announce(void) |
755 | { | 799 | { |
756 | printk(KERN_INFO "Hierarchical RCU implementation.\n"); | 800 | printk(KERN_INFO "Hierarchical RCU implementation.\n"); |
801 | rcu_bootup_announce_oddness(); | ||
757 | } | 802 | } |
758 | 803 | ||
759 | /* | 804 | /* |
@@ -1008,6 +1053,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); | |||
1008 | int rcu_needs_cpu(int cpu) | 1053 | int rcu_needs_cpu(int cpu) |
1009 | { | 1054 | { |
1010 | int c = 0; | 1055 | int c = 0; |
1056 | int snap; | ||
1057 | int snap_nmi; | ||
1011 | int thatcpu; | 1058 | int thatcpu; |
1012 | 1059 | ||
1013 | /* Check for being in the holdoff period. */ | 1060 | /* Check for being in the holdoff period. */ |
@@ -1015,12 +1062,18 @@ int rcu_needs_cpu(int cpu) | |||
1015 | return rcu_needs_cpu_quick_check(cpu); | 1062 | return rcu_needs_cpu_quick_check(cpu); |
1016 | 1063 | ||
1017 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ | 1064 | /* Don't bother unless we are the last non-dyntick-idle CPU. */ |
1018 | for_each_cpu_not(thatcpu, nohz_cpu_mask) | 1065 | for_each_online_cpu(thatcpu) { |
1019 | if (thatcpu != cpu) { | 1066 | if (thatcpu == cpu) |
1067 | continue; | ||
1068 | snap = per_cpu(rcu_dynticks, thatcpu).dynticks; | ||
1069 | snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; | ||
1070 | smp_mb(); /* Order sampling of snap with end of grace period. */ | ||
1071 | if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { | ||
1020 | per_cpu(rcu_dyntick_drain, cpu) = 0; | 1072 | per_cpu(rcu_dyntick_drain, cpu) = 0; |
1021 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; | 1073 | per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; |
1022 | return rcu_needs_cpu_quick_check(cpu); | 1074 | return rcu_needs_cpu_quick_check(cpu); |
1023 | } | 1075 | } |
1076 | } | ||
1024 | 1077 | ||
1025 | /* Check and update the rcu_dyntick_drain sequencing. */ | 1078 | /* Check and update the rcu_dyntick_drain sequencing. */ |
1026 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { | 1079 | if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d45db2e35d27..36c95b45738e 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -241,11 +241,13 @@ static const struct file_operations rcugp_fops = { | |||
241 | static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) | 241 | static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) |
242 | { | 242 | { |
243 | seq_printf(m, "%3d%cnp=%ld " | 243 | seq_printf(m, "%3d%cnp=%ld " |
244 | "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", | 244 | "qsp=%ld rpq=%ld cbr=%ld cng=%ld " |
245 | "gpc=%ld gps=%ld nf=%ld nn=%ld\n", | ||
245 | rdp->cpu, | 246 | rdp->cpu, |
246 | cpu_is_offline(rdp->cpu) ? '!' : ' ', | 247 | cpu_is_offline(rdp->cpu) ? '!' : ' ', |
247 | rdp->n_rcu_pending, | 248 | rdp->n_rcu_pending, |
248 | rdp->n_rp_qs_pending, | 249 | rdp->n_rp_qs_pending, |
250 | rdp->n_rp_report_qs, | ||
249 | rdp->n_rp_cb_ready, | 251 | rdp->n_rp_cb_ready, |
250 | rdp->n_rp_cpu_needs_gp, | 252 | rdp->n_rp_cpu_needs_gp, |
251 | rdp->n_rp_gp_completed, | 253 | rdp->n_rp_gp_completed, |
diff --git a/kernel/sched.c b/kernel/sched.c index 78554dd0d1a4..1d93cd0ae4d3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3608,7 +3608,7 @@ need_resched: | |||
3608 | preempt_disable(); | 3608 | preempt_disable(); |
3609 | cpu = smp_processor_id(); | 3609 | cpu = smp_processor_id(); |
3610 | rq = cpu_rq(cpu); | 3610 | rq = cpu_rq(cpu); |
3611 | rcu_sched_qs(cpu); | 3611 | rcu_note_context_switch(cpu); |
3612 | prev = rq->curr; | 3612 | prev = rq->curr; |
3613 | switch_count = &prev->nivcsw; | 3613 | switch_count = &prev->nivcsw; |
3614 | 3614 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 9cf1baf6616a..87a330a7185f 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -114,7 +114,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) | |||
114 | { | 114 | { |
115 | char path[64]; | 115 | char path[64]; |
116 | 116 | ||
117 | rcu_read_lock(); | ||
117 | cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); | 118 | cgroup_path(task_group(p)->css.cgroup, path, sizeof(path)); |
119 | rcu_read_unlock(); | ||
118 | SEQ_printf(m, " %s", path); | 120 | SEQ_printf(m, " %s", path); |
119 | } | 121 | } |
120 | #endif | 122 | #endif |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 7c1a67ef0274..0db913a5c60f 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu) | |||
716 | preempt_enable_no_resched(); | 716 | preempt_enable_no_resched(); |
717 | cond_resched(); | 717 | cond_resched(); |
718 | preempt_disable(); | 718 | preempt_disable(); |
719 | rcu_sched_qs((long)__bind_cpu); | 719 | rcu_note_context_switch((long)__bind_cpu); |
720 | } | 720 | } |
721 | preempt_enable(); | 721 | preempt_enable(); |
722 | set_current_state(TASK_INTERRUPTIBLE); | 722 | set_current_state(TASK_INTERRUPTIBLE); |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index ef51d1fcf5e6..b4e7431e7c78 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -294,7 +294,6 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
294 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | 294 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; |
295 | unsigned int cpu = (unsigned long)hcpu; | 295 | unsigned int cpu = (unsigned long)hcpu; |
296 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | 296 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); |
297 | struct cpu_stop_work *work; | ||
298 | struct task_struct *p; | 297 | struct task_struct *p; |
299 | 298 | ||
300 | switch (action & ~CPU_TASKS_FROZEN) { | 299 | switch (action & ~CPU_TASKS_FROZEN) { |
@@ -323,6 +322,9 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
323 | #ifdef CONFIG_HOTPLUG_CPU | 322 | #ifdef CONFIG_HOTPLUG_CPU |
324 | case CPU_UP_CANCELED: | 323 | case CPU_UP_CANCELED: |
325 | case CPU_DEAD: | 324 | case CPU_DEAD: |
325 | { | ||
326 | struct cpu_stop_work *work; | ||
327 | |||
326 | /* kill the stopper */ | 328 | /* kill the stopper */ |
327 | kthread_stop(stopper->thread); | 329 | kthread_stop(stopper->thread); |
328 | /* drain remaining works */ | 330 | /* drain remaining works */ |
@@ -335,6 +337,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
335 | put_task_struct(stopper->thread); | 337 | put_task_struct(stopper->thread); |
336 | stopper->thread = NULL; | 338 | stopper->thread = NULL; |
337 | break; | 339 | break; |
340 | } | ||
338 | #endif | 341 | #endif |
339 | } | 342 | } |
340 | 343 | ||
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 0a47e8d6b491..26b8607a0abc 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c | |||
@@ -9,13 +9,9 @@ | |||
9 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
10 | #include "trace.h" | 10 | #include "trace.h" |
11 | 11 | ||
12 | DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); | ||
13 | EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); | ||
14 | |||
15 | EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); | 12 | EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); |
16 | 13 | ||
17 | static char *perf_trace_buf; | 14 | static char *perf_trace_buf[4]; |
18 | static char *perf_trace_buf_nmi; | ||
19 | 15 | ||
20 | /* | 16 | /* |
21 | * Force it to be aligned to unsigned long to avoid misaligned accesses | 17 | * Force it to be aligned to unsigned long to avoid misaligned accesses |
@@ -27,63 +23,82 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)]) | |||
27 | /* Count the events in use (per event id, not per instance) */ | 23 | /* Count the events in use (per event id, not per instance) */ |
28 | static int total_ref_count; | 24 | static int total_ref_count; |
29 | 25 | ||
30 | static int perf_trace_event_enable(struct ftrace_event_call *event) | 26 | static int perf_trace_event_init(struct ftrace_event_call *tp_event, |
27 | struct perf_event *p_event) | ||
31 | { | 28 | { |
32 | char *buf; | 29 | struct hlist_head *list; |
33 | int ret = -ENOMEM; | 30 | int ret = -ENOMEM; |
31 | int cpu; | ||
34 | 32 | ||
35 | if (event->perf_refcount++ > 0) | 33 | p_event->tp_event = tp_event; |
34 | if (tp_event->perf_refcount++ > 0) | ||
36 | return 0; | 35 | return 0; |
37 | 36 | ||
38 | if (!total_ref_count) { | 37 | list = alloc_percpu(struct hlist_head); |
39 | buf = (char *)alloc_percpu(perf_trace_t); | 38 | if (!list) |
40 | if (!buf) | 39 | goto fail; |
41 | goto fail_buf; | 40 | |
41 | for_each_possible_cpu(cpu) | ||
42 | INIT_HLIST_HEAD(per_cpu_ptr(list, cpu)); | ||
42 | 43 | ||
43 | rcu_assign_pointer(perf_trace_buf, buf); | 44 | tp_event->perf_events = list; |
45 | |||
46 | if (!total_ref_count) { | ||
47 | char *buf; | ||
48 | int i; | ||
44 | 49 | ||
45 | buf = (char *)alloc_percpu(perf_trace_t); | 50 | for (i = 0; i < 4; i++) { |
46 | if (!buf) | 51 | buf = (char *)alloc_percpu(perf_trace_t); |
47 | goto fail_buf_nmi; | 52 | if (!buf) |
53 | goto fail; | ||
48 | 54 | ||
49 | rcu_assign_pointer(perf_trace_buf_nmi, buf); | 55 | perf_trace_buf[i] = buf; |
56 | } | ||
50 | } | 57 | } |
51 | 58 | ||
52 | if (event->class->reg) | 59 | if (tp_event->class->reg) |
53 | ret = event->class->reg(event, TRACE_REG_PERF_REGISTER); | 60 | ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER); |
54 | else | 61 | else |
55 | ret = tracepoint_probe_register(event->name, | 62 | ret = tracepoint_probe_register(tp_event->name, |
56 | event->class->perf_probe, | 63 | tp_event->class->perf_probe, |
57 | event); | 64 | tp_event); |
58 | if (!ret) { | 65 | |
59 | total_ref_count++; | 66 | if (ret) |
60 | return 0; | 67 | goto fail; |
61 | } | ||
62 | 68 | ||
63 | fail_buf_nmi: | 69 | total_ref_count++; |
70 | return 0; | ||
71 | |||
72 | fail: | ||
64 | if (!total_ref_count) { | 73 | if (!total_ref_count) { |
65 | free_percpu(perf_trace_buf_nmi); | 74 | int i; |
66 | free_percpu(perf_trace_buf); | 75 | |
67 | perf_trace_buf_nmi = NULL; | 76 | for (i = 0; i < 4; i++) { |
68 | perf_trace_buf = NULL; | 77 | free_percpu(perf_trace_buf[i]); |
78 | perf_trace_buf[i] = NULL; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | if (!--tp_event->perf_refcount) { | ||
83 | free_percpu(tp_event->perf_events); | ||
84 | tp_event->perf_events = NULL; | ||
69 | } | 85 | } |
70 | fail_buf: | ||
71 | event->perf_refcount--; | ||
72 | 86 | ||
73 | return ret; | 87 | return ret; |
74 | } | 88 | } |
75 | 89 | ||
76 | int perf_trace_enable(int event_id) | 90 | int perf_trace_init(struct perf_event *p_event) |
77 | { | 91 | { |
78 | struct ftrace_event_call *event; | 92 | struct ftrace_event_call *tp_event; |
93 | int event_id = p_event->attr.config; | ||
79 | int ret = -EINVAL; | 94 | int ret = -EINVAL; |
80 | 95 | ||
81 | mutex_lock(&event_mutex); | 96 | mutex_lock(&event_mutex); |
82 | list_for_each_entry(event, &ftrace_events, list) { | 97 | list_for_each_entry(tp_event, &ftrace_events, list) { |
83 | if (event->event.type == event_id && | 98 | if (tp_event->event.type == event_id && |
84 | event->class && event->class->perf_probe && | 99 | tp_event->class && tp_event->class->perf_probe && |
85 | try_module_get(event->mod)) { | 100 | try_module_get(tp_event->mod)) { |
86 | ret = perf_trace_event_enable(event); | 101 | ret = perf_trace_event_init(tp_event, p_event); |
87 | break; | 102 | break; |
88 | } | 103 | } |
89 | } | 104 | } |
@@ -92,93 +107,76 @@ int perf_trace_enable(int event_id) | |||
92 | return ret; | 107 | return ret; |
93 | } | 108 | } |
94 | 109 | ||
95 | static void perf_trace_event_disable(struct ftrace_event_call *event) | 110 | int perf_trace_enable(struct perf_event *p_event) |
96 | { | 111 | { |
97 | char *buf, *nmi_buf; | 112 | struct ftrace_event_call *tp_event = p_event->tp_event; |
98 | 113 | struct hlist_head *list; | |
99 | if (--event->perf_refcount > 0) | ||
100 | return; | ||
101 | |||
102 | if (event->class->reg) | ||
103 | event->class->reg(event, TRACE_REG_PERF_UNREGISTER); | ||
104 | else | ||
105 | tracepoint_probe_unregister(event->name, event->class->perf_probe, event); | ||
106 | 114 | ||
107 | if (!--total_ref_count) { | 115 | list = tp_event->perf_events; |
108 | buf = perf_trace_buf; | 116 | if (WARN_ON_ONCE(!list)) |
109 | rcu_assign_pointer(perf_trace_buf, NULL); | 117 | return -EINVAL; |
110 | 118 | ||
111 | nmi_buf = perf_trace_buf_nmi; | 119 | list = per_cpu_ptr(list, smp_processor_id()); |
112 | rcu_assign_pointer(perf_trace_buf_nmi, NULL); | 120 | hlist_add_head_rcu(&p_event->hlist_entry, list); |
113 | 121 | ||
114 | /* | 122 | return 0; |
115 | * Ensure every events in profiling have finished before | 123 | } |
116 | * releasing the buffers | ||
117 | */ | ||
118 | synchronize_sched(); | ||
119 | 124 | ||
120 | free_percpu(buf); | 125 | void perf_trace_disable(struct perf_event *p_event) |
121 | free_percpu(nmi_buf); | 126 | { |
122 | } | 127 | hlist_del_rcu(&p_event->hlist_entry); |
123 | } | 128 | } |
124 | 129 | ||
125 | void perf_trace_disable(int event_id) | 130 | void perf_trace_destroy(struct perf_event *p_event) |
126 | { | 131 | { |
127 | struct ftrace_event_call *event; | 132 | struct ftrace_event_call *tp_event = p_event->tp_event; |
133 | int i; | ||
128 | 134 | ||
129 | mutex_lock(&event_mutex); | 135 | if (--tp_event->perf_refcount > 0) |
130 | list_for_each_entry(event, &ftrace_events, list) { | 136 | return; |
131 | if (event->event.type == event_id) { | 137 | |
132 | perf_trace_event_disable(event); | 138 | if (tp_event->class->reg) |
133 | module_put(event->mod); | 139 | tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); |
134 | break; | 140 | else |
141 | tracepoint_probe_unregister(tp_event->name, | ||
142 | tp_event->class->perf_probe, | ||
143 | tp_event); | ||
144 | |||
145 | free_percpu(tp_event->perf_events); | ||
146 | tp_event->perf_events = NULL; | ||
147 | |||
148 | if (!--total_ref_count) { | ||
149 | for (i = 0; i < 4; i++) { | ||
150 | free_percpu(perf_trace_buf[i]); | ||
151 | perf_trace_buf[i] = NULL; | ||
135 | } | 152 | } |
136 | } | 153 | } |
137 | mutex_unlock(&event_mutex); | ||
138 | } | 154 | } |
139 | 155 | ||
140 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, | 156 | __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, |
141 | int *rctxp, unsigned long *irq_flags) | 157 | struct pt_regs *regs, int *rctxp) |
142 | { | 158 | { |
143 | struct trace_entry *entry; | 159 | struct trace_entry *entry; |
144 | char *trace_buf, *raw_data; | 160 | char *raw_data; |
145 | int pc, cpu; | 161 | int pc; |
146 | 162 | ||
147 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); | 163 | BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); |
148 | 164 | ||
149 | pc = preempt_count(); | 165 | pc = preempt_count(); |
150 | 166 | ||
151 | /* Protect the per cpu buffer, begin the rcu read side */ | ||
152 | local_irq_save(*irq_flags); | ||
153 | |||
154 | *rctxp = perf_swevent_get_recursion_context(); | 167 | *rctxp = perf_swevent_get_recursion_context(); |
155 | if (*rctxp < 0) | 168 | if (*rctxp < 0) |
156 | goto err_recursion; | 169 | return NULL; |
157 | |||
158 | cpu = smp_processor_id(); | ||
159 | |||
160 | if (in_nmi()) | ||
161 | trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); | ||
162 | else | ||
163 | trace_buf = rcu_dereference_sched(perf_trace_buf); | ||
164 | |||
165 | if (!trace_buf) | ||
166 | goto err; | ||
167 | 170 | ||
168 | raw_data = per_cpu_ptr(trace_buf, cpu); | 171 | raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); |
169 | 172 | ||
170 | /* zero the dead bytes from align to not leak stack to user */ | 173 | /* zero the dead bytes from align to not leak stack to user */ |
171 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); | 174 | memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); |
172 | 175 | ||
173 | entry = (struct trace_entry *)raw_data; | 176 | entry = (struct trace_entry *)raw_data; |
174 | tracing_generic_entry_update(entry, *irq_flags, pc); | 177 | tracing_generic_entry_update(entry, regs->flags, pc); |
175 | entry->type = type; | 178 | entry->type = type; |
176 | 179 | ||
177 | return raw_data; | 180 | return raw_data; |
178 | err: | ||
179 | perf_swevent_put_recursion_context(*rctxp); | ||
180 | err_recursion: | ||
181 | local_irq_restore(*irq_flags); | ||
182 | return NULL; | ||
183 | } | 181 | } |
184 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); | 182 | EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); |
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 9a082bba9537..faf7cefd15da 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c | |||
@@ -1338,9 +1338,9 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1338 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); | 1338 | struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); |
1339 | struct ftrace_event_call *call = &tp->call; | 1339 | struct ftrace_event_call *call = &tp->call; |
1340 | struct kprobe_trace_entry_head *entry; | 1340 | struct kprobe_trace_entry_head *entry; |
1341 | struct hlist_head *head; | ||
1341 | u8 *data; | 1342 | u8 *data; |
1342 | int size, __size, i; | 1343 | int size, __size, i; |
1343 | unsigned long irq_flags; | ||
1344 | int rctx; | 1344 | int rctx; |
1345 | 1345 | ||
1346 | __size = sizeof(*entry) + tp->size; | 1346 | __size = sizeof(*entry) + tp->size; |
@@ -1350,8 +1350,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1350 | "profile buffer not large enough")) | 1350 | "profile buffer not large enough")) |
1351 | return; | 1351 | return; |
1352 | 1352 | ||
1353 | entry = perf_trace_buf_prepare(size, call->event.type, | 1353 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
1354 | &rctx, &irq_flags); | ||
1355 | if (!entry) | 1354 | if (!entry) |
1356 | return; | 1355 | return; |
1357 | 1356 | ||
@@ -1360,7 +1359,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp, | |||
1360 | for (i = 0; i < tp->nr_args; i++) | 1359 | for (i = 0; i < tp->nr_args; i++) |
1361 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1360 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1362 | 1361 | ||
1363 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs); | 1362 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); |
1363 | perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); | ||
1364 | } | 1364 | } |
1365 | 1365 | ||
1366 | /* Kretprobe profile handler */ | 1366 | /* Kretprobe profile handler */ |
@@ -1370,9 +1370,9 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1370 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); | 1370 | struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); |
1371 | struct ftrace_event_call *call = &tp->call; | 1371 | struct ftrace_event_call *call = &tp->call; |
1372 | struct kretprobe_trace_entry_head *entry; | 1372 | struct kretprobe_trace_entry_head *entry; |
1373 | struct hlist_head *head; | ||
1373 | u8 *data; | 1374 | u8 *data; |
1374 | int size, __size, i; | 1375 | int size, __size, i; |
1375 | unsigned long irq_flags; | ||
1376 | int rctx; | 1376 | int rctx; |
1377 | 1377 | ||
1378 | __size = sizeof(*entry) + tp->size; | 1378 | __size = sizeof(*entry) + tp->size; |
@@ -1382,8 +1382,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1382 | "profile buffer not large enough")) | 1382 | "profile buffer not large enough")) |
1383 | return; | 1383 | return; |
1384 | 1384 | ||
1385 | entry = perf_trace_buf_prepare(size, call->event.type, | 1385 | entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); |
1386 | &rctx, &irq_flags); | ||
1387 | if (!entry) | 1386 | if (!entry) |
1388 | return; | 1387 | return; |
1389 | 1388 | ||
@@ -1393,8 +1392,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri, | |||
1393 | for (i = 0; i < tp->nr_args; i++) | 1392 | for (i = 0; i < tp->nr_args; i++) |
1394 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); | 1393 | call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); |
1395 | 1394 | ||
1396 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, | 1395 | head = per_cpu_ptr(call->perf_events, smp_processor_id()); |
1397 | irq_flags, regs); | 1396 | perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); |
1398 | } | 1397 | } |
1399 | 1398 | ||
1400 | static int probe_perf_enable(struct ftrace_event_call *call) | 1399 | static int probe_perf_enable(struct ftrace_event_call *call) |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 9d358301ae3e..d2c859cec9ea 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -488,7 +488,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
488 | { | 488 | { |
489 | struct syscall_metadata *sys_data; | 489 | struct syscall_metadata *sys_data; |
490 | struct syscall_trace_enter *rec; | 490 | struct syscall_trace_enter *rec; |
491 | unsigned long flags; | 491 | struct hlist_head *head; |
492 | int syscall_nr; | 492 | int syscall_nr; |
493 | int rctx; | 493 | int rctx; |
494 | int size; | 494 | int size; |
@@ -511,15 +511,16 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
511 | return; | 511 | return; |
512 | 512 | ||
513 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, | 513 | rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, |
514 | sys_data->enter_event->event.type, | 514 | sys_data->enter_event->event.type, regs, &rctx); |
515 | &rctx, &flags); | ||
516 | if (!rec) | 515 | if (!rec) |
517 | return; | 516 | return; |
518 | 517 | ||
519 | rec->nr = syscall_nr; | 518 | rec->nr = syscall_nr; |
520 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, | 519 | syscall_get_arguments(current, regs, 0, sys_data->nb_args, |
521 | (unsigned long *)&rec->args); | 520 | (unsigned long *)&rec->args); |
522 | perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); | 521 | |
522 | head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); | ||
523 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | ||
523 | } | 524 | } |
524 | 525 | ||
525 | int perf_sysenter_enable(struct ftrace_event_call *call) | 526 | int perf_sysenter_enable(struct ftrace_event_call *call) |
@@ -561,7 +562,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
561 | { | 562 | { |
562 | struct syscall_metadata *sys_data; | 563 | struct syscall_metadata *sys_data; |
563 | struct syscall_trace_exit *rec; | 564 | struct syscall_trace_exit *rec; |
564 | unsigned long flags; | 565 | struct hlist_head *head; |
565 | int syscall_nr; | 566 | int syscall_nr; |
566 | int rctx; | 567 | int rctx; |
567 | int size; | 568 | int size; |
@@ -587,15 +588,15 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
587 | return; | 588 | return; |
588 | 589 | ||
589 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, | 590 | rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, |
590 | sys_data->exit_event->event.type, | 591 | sys_data->exit_event->event.type, regs, &rctx); |
591 | &rctx, &flags); | ||
592 | if (!rec) | 592 | if (!rec) |
593 | return; | 593 | return; |
594 | 594 | ||
595 | rec->nr = syscall_nr; | 595 | rec->nr = syscall_nr; |
596 | rec->ret = syscall_get_return_value(current, regs); | 596 | rec->ret = syscall_get_return_value(current, regs); |
597 | 597 | ||
598 | perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs); | 598 | head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); |
599 | perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); | ||
599 | } | 600 | } |
600 | 601 | ||
601 | int perf_sysexit_enable(struct ftrace_event_call *call) | 602 | int perf_sysexit_enable(struct ftrace_event_call *call) |