diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Kconfig.locks | 9 | ||||
| -rw-r--r-- | kernel/cgroup.c | 58 | ||||
| -rw-r--r-- | kernel/cpuset.c | 20 | ||||
| -rw-r--r-- | kernel/events/core.c | 34 | ||||
| -rw-r--r-- | kernel/kprobes.c | 14 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.c | 64 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.h | 9 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 2 | ||||
| -rw-r--r-- | kernel/locking/rwsem-spinlock.c | 28 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 16 | ||||
| -rw-r--r-- | kernel/locking/rwsem.c | 2 | ||||
| -rw-r--r-- | kernel/power/process.c | 1 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 4 | ||||
| -rw-r--r-- | kernel/rcu/tree.c | 140 | ||||
| -rw-r--r-- | kernel/rcu/tree.h | 6 | ||||
| -rw-r--r-- | kernel/rcu/tree_plugin.h | 2 | ||||
| -rw-r--r-- | kernel/rcu/update.c | 22 | ||||
| -rw-r--r-- | kernel/sched/core.c | 7 | ||||
| -rw-r--r-- | kernel/sched/debug.c | 2 | ||||
| -rw-r--r-- | kernel/time/alarmtimer.c | 20 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 4 | ||||
| -rw-r--r-- | kernel/trace/ring_buffer.c | 4 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 20 | ||||
| -rw-r--r-- | kernel/trace/trace_clock.c | 9 | ||||
| -rw-r--r-- | kernel/trace/trace_events.c | 1 | ||||
| -rw-r--r-- | kernel/workqueue.c | 3 |
26 files changed, 364 insertions, 137 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 35536d9c0964..76768ee812b2 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
| @@ -220,9 +220,16 @@ config INLINE_WRITE_UNLOCK_IRQRESTORE | |||
| 220 | 220 | ||
| 221 | endif | 221 | endif |
| 222 | 222 | ||
| 223 | config ARCH_SUPPORTS_ATOMIC_RMW | ||
| 224 | bool | ||
| 225 | |||
| 223 | config MUTEX_SPIN_ON_OWNER | 226 | config MUTEX_SPIN_ON_OWNER |
| 224 | def_bool y | 227 | def_bool y |
| 225 | depends on SMP && !DEBUG_MUTEXES | 228 | depends on SMP && !DEBUG_MUTEXES && ARCH_SUPPORTS_ATOMIC_RMW |
| 229 | |||
| 230 | config RWSEM_SPIN_ON_OWNER | ||
| 231 | def_bool y | ||
| 232 | depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | ||
| 226 | 233 | ||
| 227 | config ARCH_USE_QUEUE_RWLOCK | 234 | config ARCH_USE_QUEUE_RWLOCK |
| 228 | bool | 235 | bool |
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7868fc3c0bc5..70776aec2562 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -1648,10 +1648,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1648 | int flags, const char *unused_dev_name, | 1648 | int flags, const char *unused_dev_name, |
| 1649 | void *data) | 1649 | void *data) |
| 1650 | { | 1650 | { |
| 1651 | struct super_block *pinned_sb = NULL; | ||
| 1652 | struct cgroup_subsys *ss; | ||
| 1651 | struct cgroup_root *root; | 1653 | struct cgroup_root *root; |
| 1652 | struct cgroup_sb_opts opts; | 1654 | struct cgroup_sb_opts opts; |
| 1653 | struct dentry *dentry; | 1655 | struct dentry *dentry; |
| 1654 | int ret; | 1656 | int ret; |
| 1657 | int i; | ||
| 1655 | bool new_sb; | 1658 | bool new_sb; |
| 1656 | 1659 | ||
| 1657 | /* | 1660 | /* |
| @@ -1677,6 +1680,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1677 | goto out_unlock; | 1680 | goto out_unlock; |
| 1678 | } | 1681 | } |
| 1679 | 1682 | ||
| 1683 | /* | ||
| 1684 | * Destruction of cgroup root is asynchronous, so subsystems may | ||
| 1685 | * still be dying after the previous unmount. Let's drain the | ||
| 1686 | * dying subsystems. We just need to ensure that the ones | ||
| 1687 | * unmounted previously finish dying and don't care about new ones | ||
| 1688 | * starting. Testing ref liveliness is good enough. | ||
| 1689 | */ | ||
| 1690 | for_each_subsys(ss, i) { | ||
| 1691 | if (!(opts.subsys_mask & (1 << i)) || | ||
| 1692 | ss->root == &cgrp_dfl_root) | ||
| 1693 | continue; | ||
| 1694 | |||
| 1695 | if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) { | ||
| 1696 | mutex_unlock(&cgroup_mutex); | ||
| 1697 | msleep(10); | ||
| 1698 | ret = restart_syscall(); | ||
| 1699 | goto out_free; | ||
| 1700 | } | ||
| 1701 | cgroup_put(&ss->root->cgrp); | ||
| 1702 | } | ||
| 1703 | |||
| 1680 | for_each_root(root) { | 1704 | for_each_root(root) { |
| 1681 | bool name_match = false; | 1705 | bool name_match = false; |
| 1682 | 1706 | ||
| @@ -1717,15 +1741,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1717 | } | 1741 | } |
| 1718 | 1742 | ||
| 1719 | /* | 1743 | /* |
| 1720 | * A root's lifetime is governed by its root cgroup. | 1744 | * We want to reuse @root whose lifetime is governed by its |
| 1721 | * tryget_live failure indicate that the root is being | 1745 | * ->cgrp. Let's check whether @root is alive and keep it |
| 1722 | * destroyed. Wait for destruction to complete so that the | 1746 | * that way. As cgroup_kill_sb() can happen anytime, we |
| 1723 | * subsystems are free. We can use wait_queue for the wait | 1747 | * want to block it by pinning the sb so that @root doesn't |
| 1724 | * but this path is super cold. Let's just sleep for a bit | 1748 | * get killed before mount is complete. |
| 1725 | * and retry. | 1749 | * |
| 1750 | * With the sb pinned, tryget_live can reliably indicate | ||
| 1751 | * whether @root can be reused. If it's being killed, | ||
| 1752 | * drain it. We can use wait_queue for the wait but this | ||
| 1753 | * path is super cold. Let's just sleep a bit and retry. | ||
| 1726 | */ | 1754 | */ |
| 1727 | if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | 1755 | pinned_sb = kernfs_pin_sb(root->kf_root, NULL); |
| 1756 | if (IS_ERR(pinned_sb) || | ||
| 1757 | !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) { | ||
| 1728 | mutex_unlock(&cgroup_mutex); | 1758 | mutex_unlock(&cgroup_mutex); |
| 1759 | if (!IS_ERR_OR_NULL(pinned_sb)) | ||
| 1760 | deactivate_super(pinned_sb); | ||
| 1729 | msleep(10); | 1761 | msleep(10); |
| 1730 | ret = restart_syscall(); | 1762 | ret = restart_syscall(); |
| 1731 | goto out_free; | 1763 | goto out_free; |
| @@ -1770,6 +1802,16 @@ out_free: | |||
| 1770 | CGROUP_SUPER_MAGIC, &new_sb); | 1802 | CGROUP_SUPER_MAGIC, &new_sb); |
| 1771 | if (IS_ERR(dentry) || !new_sb) | 1803 | if (IS_ERR(dentry) || !new_sb) |
| 1772 | cgroup_put(&root->cgrp); | 1804 | cgroup_put(&root->cgrp); |
| 1805 | |||
| 1806 | /* | ||
| 1807 | * If @pinned_sb, we're reusing an existing root and holding an | ||
| 1808 | * extra ref on its sb. Mount is complete. Put the extra ref. | ||
| 1809 | */ | ||
| 1810 | if (pinned_sb) { | ||
| 1811 | WARN_ON(new_sb); | ||
| 1812 | deactivate_super(pinned_sb); | ||
| 1813 | } | ||
| 1814 | |||
| 1773 | return dentry; | 1815 | return dentry; |
| 1774 | } | 1816 | } |
| 1775 | 1817 | ||
| @@ -3328,7 +3370,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css) | |||
| 3328 | 3370 | ||
| 3329 | rcu_read_lock(); | 3371 | rcu_read_lock(); |
| 3330 | css_for_each_child(child, css) { | 3372 | css_for_each_child(child, css) { |
| 3331 | if (css->flags & CSS_ONLINE) { | 3373 | if (child->flags & CSS_ONLINE) { |
| 3332 | ret = true; | 3374 | ret = true; |
| 3333 | break; | 3375 | break; |
| 3334 | } | 3376 | } |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index f6b33c696224..116a4164720a 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -1181,7 +1181,13 @@ done: | |||
| 1181 | 1181 | ||
| 1182 | int current_cpuset_is_being_rebound(void) | 1182 | int current_cpuset_is_being_rebound(void) |
| 1183 | { | 1183 | { |
| 1184 | return task_cs(current) == cpuset_being_rebound; | 1184 | int ret; |
| 1185 | |||
| 1186 | rcu_read_lock(); | ||
| 1187 | ret = task_cs(current) == cpuset_being_rebound; | ||
| 1188 | rcu_read_unlock(); | ||
| 1189 | |||
| 1190 | return ret; | ||
| 1185 | } | 1191 | } |
| 1186 | 1192 | ||
| 1187 | static int update_relax_domain_level(struct cpuset *cs, s64 val) | 1193 | static int update_relax_domain_level(struct cpuset *cs, s64 val) |
| @@ -1617,7 +1623,17 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | |||
| 1617 | * resources, wait for the previously scheduled operations before | 1623 | * resources, wait for the previously scheduled operations before |
| 1618 | * proceeding, so that we don't end up keep removing tasks added | 1624 | * proceeding, so that we don't end up keep removing tasks added |
| 1619 | * after execution capability is restored. | 1625 | * after execution capability is restored. |
| 1626 | * | ||
| 1627 | * cpuset_hotplug_work calls back into cgroup core via | ||
| 1628 | * cgroup_transfer_tasks() and waiting for it from a cgroupfs | ||
| 1629 | * operation like this one can lead to a deadlock through kernfs | ||
| 1630 | * active_ref protection. Let's break the protection. Losing the | ||
| 1631 | * protection is okay as we check whether @cs is online after | ||
| 1632 | * grabbing cpuset_mutex anyway. This only happens on the legacy | ||
| 1633 | * hierarchies. | ||
| 1620 | */ | 1634 | */ |
| 1635 | css_get(&cs->css); | ||
| 1636 | kernfs_break_active_protection(of->kn); | ||
| 1621 | flush_work(&cpuset_hotplug_work); | 1637 | flush_work(&cpuset_hotplug_work); |
| 1622 | 1638 | ||
| 1623 | mutex_lock(&cpuset_mutex); | 1639 | mutex_lock(&cpuset_mutex); |
| @@ -1645,6 +1661,8 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, | |||
| 1645 | free_trial_cpuset(trialcs); | 1661 | free_trial_cpuset(trialcs); |
| 1646 | out_unlock: | 1662 | out_unlock: |
| 1647 | mutex_unlock(&cpuset_mutex); | 1663 | mutex_unlock(&cpuset_mutex); |
| 1664 | kernfs_unbreak_active_protection(of->kn); | ||
| 1665 | css_put(&cs->css); | ||
| 1648 | return retval ?: nbytes; | 1666 | return retval ?: nbytes; |
| 1649 | } | 1667 | } |
| 1650 | 1668 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index a33d9a2bcbd7..6b17ac1b0c2a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -2320,7 +2320,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, | |||
| 2320 | next_parent = rcu_dereference(next_ctx->parent_ctx); | 2320 | next_parent = rcu_dereference(next_ctx->parent_ctx); |
| 2321 | 2321 | ||
| 2322 | /* If neither context have a parent context; they cannot be clones. */ | 2322 | /* If neither context have a parent context; they cannot be clones. */ |
| 2323 | if (!parent && !next_parent) | 2323 | if (!parent || !next_parent) |
| 2324 | goto unlock; | 2324 | goto unlock; |
| 2325 | 2325 | ||
| 2326 | if (next_parent == ctx || next_ctx == parent || next_parent == parent) { | 2326 | if (next_parent == ctx || next_ctx == parent || next_parent == parent) { |
| @@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
| 7458 | struct perf_event_context *child_ctx, | 7458 | struct perf_event_context *child_ctx, |
| 7459 | struct task_struct *child) | 7459 | struct task_struct *child) |
| 7460 | { | 7460 | { |
| 7461 | perf_remove_from_context(child_event, true); | 7461 | /* |
| 7462 | * Do not destroy the 'original' grouping; because of the context | ||
| 7463 | * switch optimization the original events could've ended up in a | ||
| 7464 | * random child task. | ||
| 7465 | * | ||
| 7466 | * If we were to destroy the original group, all group related | ||
| 7467 | * operations would cease to function properly after this random | ||
| 7468 | * child dies. | ||
| 7469 | * | ||
| 7470 | * Do destroy all inherited groups, we don't care about those | ||
| 7471 | * and being thorough is better. | ||
| 7472 | */ | ||
| 7473 | perf_remove_from_context(child_event, !!child_event->parent); | ||
| 7462 | 7474 | ||
| 7463 | /* | 7475 | /* |
| 7464 | * It can happen that the parent exits first, and has events | 7476 | * It can happen that the parent exits first, and has events |
| @@ -7474,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
| 7474 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | 7486 | static void perf_event_exit_task_context(struct task_struct *child, int ctxn) |
| 7475 | { | 7487 | { |
| 7476 | struct perf_event *child_event, *next; | 7488 | struct perf_event *child_event, *next; |
| 7477 | struct perf_event_context *child_ctx; | 7489 | struct perf_event_context *child_ctx, *parent_ctx; |
| 7478 | unsigned long flags; | 7490 | unsigned long flags; |
| 7479 | 7491 | ||
| 7480 | if (likely(!child->perf_event_ctxp[ctxn])) { | 7492 | if (likely(!child->perf_event_ctxp[ctxn])) { |
| @@ -7499,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 7499 | raw_spin_lock(&child_ctx->lock); | 7511 | raw_spin_lock(&child_ctx->lock); |
| 7500 | task_ctx_sched_out(child_ctx); | 7512 | task_ctx_sched_out(child_ctx); |
| 7501 | child->perf_event_ctxp[ctxn] = NULL; | 7513 | child->perf_event_ctxp[ctxn] = NULL; |
| 7514 | |||
| 7515 | /* | ||
| 7516 | * In order to avoid freeing: child_ctx->parent_ctx->task | ||
| 7517 | * under perf_event_context::lock, grab another reference. | ||
| 7518 | */ | ||
| 7519 | parent_ctx = child_ctx->parent_ctx; | ||
| 7520 | if (parent_ctx) | ||
| 7521 | get_ctx(parent_ctx); | ||
| 7522 | |||
| 7502 | /* | 7523 | /* |
| 7503 | * If this context is a clone; unclone it so it can't get | 7524 | * If this context is a clone; unclone it so it can't get |
| 7504 | * swapped to another process while we're removing all | 7525 | * swapped to another process while we're removing all |
| @@ -7509,6 +7530,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) | |||
| 7509 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); | 7530 | raw_spin_unlock_irqrestore(&child_ctx->lock, flags); |
| 7510 | 7531 | ||
| 7511 | /* | 7532 | /* |
| 7533 | * Now that we no longer hold perf_event_context::lock, drop | ||
| 7534 | * our extra child_ctx->parent_ctx reference. | ||
| 7535 | */ | ||
| 7536 | if (parent_ctx) | ||
| 7537 | put_ctx(parent_ctx); | ||
| 7538 | |||
| 7539 | /* | ||
| 7512 | * Report the task dead after unscheduling the events so that we | 7540 | * Report the task dead after unscheduling the events so that we |
| 7513 | * won't get any samples after PERF_RECORD_EXIT. We can however still | 7541 | * won't get any samples after PERF_RECORD_EXIT. We can however still |
| 7514 | * get a few PERF_RECORD_READ events. | 7542 | * get a few PERF_RECORD_READ events. |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3214289df5a7..734e9a7d280b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start, | |||
| 2037 | { | 2037 | { |
| 2038 | unsigned long *iter; | 2038 | unsigned long *iter; |
| 2039 | struct kprobe_blacklist_entry *ent; | 2039 | struct kprobe_blacklist_entry *ent; |
| 2040 | unsigned long offset = 0, size = 0; | 2040 | unsigned long entry, offset = 0, size = 0; |
| 2041 | 2041 | ||
| 2042 | for (iter = start; iter < end; iter++) { | 2042 | for (iter = start; iter < end; iter++) { |
| 2043 | if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { | 2043 | entry = arch_deref_entry_point((void *)*iter); |
| 2044 | pr_err("Failed to find blacklist %p\n", (void *)*iter); | 2044 | |
| 2045 | if (!kernel_text_address(entry) || | ||
| 2046 | !kallsyms_lookup_size_offset(entry, &size, &offset)) { | ||
| 2047 | pr_err("Failed to find blacklist at %p\n", | ||
| 2048 | (void *)entry); | ||
| 2045 | continue; | 2049 | continue; |
| 2046 | } | 2050 | } |
| 2047 | 2051 | ||
| 2048 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); | 2052 | ent = kmalloc(sizeof(*ent), GFP_KERNEL); |
| 2049 | if (!ent) | 2053 | if (!ent) |
| 2050 | return -ENOMEM; | 2054 | return -ENOMEM; |
| 2051 | ent->start_addr = *iter; | 2055 | ent->start_addr = entry; |
| 2052 | ent->end_addr = *iter + size; | 2056 | ent->end_addr = entry + size; |
| 2053 | INIT_LIST_HEAD(&ent->list); | 2057 | INIT_LIST_HEAD(&ent->list); |
| 2054 | list_add_tail(&ent->list, &kprobe_blacklist); | 2058 | list_add_tail(&ent->list, &kprobe_blacklist); |
| 2055 | } | 2059 | } |
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c index 838dc9e00669..be9ee1559fca 100644 --- a/kernel/locking/mcs_spinlock.c +++ b/kernel/locking/mcs_spinlock.c | |||
| @@ -14,21 +14,47 @@ | |||
| 14 | * called from interrupt context and we have preemption disabled while | 14 | * called from interrupt context and we have preemption disabled while |
| 15 | * spinning. | 15 | * spinning. |
| 16 | */ | 16 | */ |
| 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); |
| 18 | |||
| 19 | /* | ||
| 20 | * We use the value 0 to represent "no CPU", thus the encoded value | ||
| 21 | * will be the CPU number incremented by 1. | ||
| 22 | */ | ||
| 23 | static inline int encode_cpu(int cpu_nr) | ||
| 24 | { | ||
| 25 | return cpu_nr + 1; | ||
| 26 | } | ||
| 27 | |||
| 28 | static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) | ||
| 29 | { | ||
| 30 | int cpu_nr = encoded_cpu_val - 1; | ||
| 31 | |||
| 32 | return per_cpu_ptr(&osq_node, cpu_nr); | ||
| 33 | } | ||
| 18 | 34 | ||
| 19 | /* | 35 | /* |
| 20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | 36 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. |
| 21 | * Can return NULL in case we were the last queued and we updated @lock instead. | 37 | * Can return NULL in case we were the last queued and we updated @lock instead. |
| 22 | */ | 38 | */ |
| 23 | static inline struct optimistic_spin_queue * | 39 | static inline struct optimistic_spin_node * |
| 24 | osq_wait_next(struct optimistic_spin_queue **lock, | 40 | osq_wait_next(struct optimistic_spin_queue *lock, |
| 25 | struct optimistic_spin_queue *node, | 41 | struct optimistic_spin_node *node, |
| 26 | struct optimistic_spin_queue *prev) | 42 | struct optimistic_spin_node *prev) |
| 27 | { | 43 | { |
| 28 | struct optimistic_spin_queue *next = NULL; | 44 | struct optimistic_spin_node *next = NULL; |
| 45 | int curr = encode_cpu(smp_processor_id()); | ||
| 46 | int old; | ||
| 47 | |||
| 48 | /* | ||
| 49 | * If there is a prev node in queue, then the 'old' value will be | ||
| 50 | * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if | ||
| 51 | * we're currently last in queue, then the queue will then become empty. | ||
| 52 | */ | ||
| 53 | old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; | ||
| 29 | 54 | ||
| 30 | for (;;) { | 55 | for (;;) { |
| 31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | 56 | if (atomic_read(&lock->tail) == curr && |
| 57 | atomic_cmpxchg(&lock->tail, curr, old) == curr) { | ||
| 32 | /* | 58 | /* |
| 33 | * We were the last queued, we moved @lock back. @prev | 59 | * We were the last queued, we moved @lock back. @prev |
| 34 | * will now observe @lock and will complete its | 60 | * will now observe @lock and will complete its |
| @@ -59,18 +85,23 @@ osq_wait_next(struct optimistic_spin_queue **lock, | |||
| 59 | return next; | 85 | return next; |
| 60 | } | 86 | } |
| 61 | 87 | ||
| 62 | bool osq_lock(struct optimistic_spin_queue **lock) | 88 | bool osq_lock(struct optimistic_spin_queue *lock) |
| 63 | { | 89 | { |
| 64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | 90 | struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); |
| 65 | struct optimistic_spin_queue *prev, *next; | 91 | struct optimistic_spin_node *prev, *next; |
| 92 | int curr = encode_cpu(smp_processor_id()); | ||
| 93 | int old; | ||
| 66 | 94 | ||
| 67 | node->locked = 0; | 95 | node->locked = 0; |
| 68 | node->next = NULL; | 96 | node->next = NULL; |
| 97 | node->cpu = curr; | ||
| 69 | 98 | ||
| 70 | node->prev = prev = xchg(lock, node); | 99 | old = atomic_xchg(&lock->tail, curr); |
| 71 | if (likely(prev == NULL)) | 100 | if (old == OSQ_UNLOCKED_VAL) |
| 72 | return true; | 101 | return true; |
| 73 | 102 | ||
| 103 | prev = decode_cpu(old); | ||
| 104 | node->prev = prev; | ||
| 74 | ACCESS_ONCE(prev->next) = node; | 105 | ACCESS_ONCE(prev->next) = node; |
| 75 | 106 | ||
| 76 | /* | 107 | /* |
| @@ -149,20 +180,21 @@ unqueue: | |||
| 149 | return false; | 180 | return false; |
| 150 | } | 181 | } |
| 151 | 182 | ||
| 152 | void osq_unlock(struct optimistic_spin_queue **lock) | 183 | void osq_unlock(struct optimistic_spin_queue *lock) |
| 153 | { | 184 | { |
| 154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | 185 | struct optimistic_spin_node *node, *next; |
| 155 | struct optimistic_spin_queue *next; | 186 | int curr = encode_cpu(smp_processor_id()); |
| 156 | 187 | ||
| 157 | /* | 188 | /* |
| 158 | * Fast path for the uncontended case. | 189 | * Fast path for the uncontended case. |
| 159 | */ | 190 | */ |
| 160 | if (likely(cmpxchg(lock, node, NULL) == node)) | 191 | if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr)) |
| 161 | return; | 192 | return; |
| 162 | 193 | ||
| 163 | /* | 194 | /* |
| 164 | * Second most likely case. | 195 | * Second most likely case. |
| 165 | */ | 196 | */ |
| 197 | node = this_cpu_ptr(&osq_node); | ||
| 166 | next = xchg(&node->next, NULL); | 198 | next = xchg(&node->next, NULL); |
| 167 | if (next) { | 199 | if (next) { |
| 168 | ACCESS_ONCE(next->locked) = 1; | 200 | ACCESS_ONCE(next->locked) = 1; |
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h index a2dbac4aca6b..74356dc0ce29 100644 --- a/kernel/locking/mcs_spinlock.h +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -118,12 +118,13 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | |||
| 118 | * mutex_lock()/rwsem_down_{read,write}() etc. | 118 | * mutex_lock()/rwsem_down_{read,write}() etc. |
| 119 | */ | 119 | */ |
| 120 | 120 | ||
| 121 | struct optimistic_spin_queue { | 121 | struct optimistic_spin_node { |
| 122 | struct optimistic_spin_queue *next, *prev; | 122 | struct optimistic_spin_node *next, *prev; |
| 123 | int locked; /* 1 if lock acquired */ | 123 | int locked; /* 1 if lock acquired */ |
| 124 | int cpu; /* encoded CPU # value */ | ||
| 124 | }; | 125 | }; |
| 125 | 126 | ||
| 126 | extern bool osq_lock(struct optimistic_spin_queue **lock); | 127 | extern bool osq_lock(struct optimistic_spin_queue *lock); |
| 127 | extern void osq_unlock(struct optimistic_spin_queue **lock); | 128 | extern void osq_unlock(struct optimistic_spin_queue *lock); |
| 128 | 129 | ||
| 129 | #endif /* __LINUX_MCS_SPINLOCK_H */ | 130 | #endif /* __LINUX_MCS_SPINLOCK_H */ |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index bc73d33c6760..acca2c1a3c5e 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -60,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
| 60 | INIT_LIST_HEAD(&lock->wait_list); | 60 | INIT_LIST_HEAD(&lock->wait_list); |
| 61 | mutex_clear_owner(lock); | 61 | mutex_clear_owner(lock); |
| 62 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 62 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
| 63 | lock->osq = NULL; | 63 | osq_lock_init(&lock->osq); |
| 64 | #endif | 64 | #endif |
| 65 | 65 | ||
| 66 | debug_mutex_init(lock, name, key); | 66 | debug_mutex_init(lock, name, key); |
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 9be8a9144978..2c93571162cb 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c | |||
| @@ -26,7 +26,7 @@ int rwsem_is_locked(struct rw_semaphore *sem) | |||
| 26 | unsigned long flags; | 26 | unsigned long flags; |
| 27 | 27 | ||
| 28 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { | 28 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { |
| 29 | ret = (sem->activity != 0); | 29 | ret = (sem->count != 0); |
| 30 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 30 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| 31 | } | 31 | } |
| 32 | return ret; | 32 | return ret; |
| @@ -46,7 +46,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, | |||
| 46 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | 46 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); |
| 47 | lockdep_init_map(&sem->dep_map, name, key, 0); | 47 | lockdep_init_map(&sem->dep_map, name, key, 0); |
| 48 | #endif | 48 | #endif |
| 49 | sem->activity = 0; | 49 | sem->count = 0; |
| 50 | raw_spin_lock_init(&sem->wait_lock); | 50 | raw_spin_lock_init(&sem->wait_lock); |
| 51 | INIT_LIST_HEAD(&sem->wait_list); | 51 | INIT_LIST_HEAD(&sem->wait_list); |
| 52 | } | 52 | } |
| @@ -95,7 +95,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | |||
| 95 | waiter = list_entry(next, struct rwsem_waiter, list); | 95 | waiter = list_entry(next, struct rwsem_waiter, list); |
| 96 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | 96 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); |
| 97 | 97 | ||
| 98 | sem->activity += woken; | 98 | sem->count += woken; |
| 99 | 99 | ||
| 100 | out: | 100 | out: |
| 101 | return sem; | 101 | return sem; |
| @@ -126,9 +126,9 @@ void __sched __down_read(struct rw_semaphore *sem) | |||
| 126 | 126 | ||
| 127 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 127 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 128 | 128 | ||
| 129 | if (sem->activity >= 0 && list_empty(&sem->wait_list)) { | 129 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { |
| 130 | /* granted */ | 130 | /* granted */ |
| 131 | sem->activity++; | 131 | sem->count++; |
| 132 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 132 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| 133 | goto out; | 133 | goto out; |
| 134 | } | 134 | } |
| @@ -170,9 +170,9 @@ int __down_read_trylock(struct rw_semaphore *sem) | |||
| 170 | 170 | ||
| 171 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 171 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 172 | 172 | ||
| 173 | if (sem->activity >= 0 && list_empty(&sem->wait_list)) { | 173 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { |
| 174 | /* granted */ | 174 | /* granted */ |
| 175 | sem->activity++; | 175 | sem->count++; |
| 176 | ret = 1; | 176 | ret = 1; |
| 177 | } | 177 | } |
| 178 | 178 | ||
| @@ -206,7 +206,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 206 | * itself into sleep and waiting for system woke it or someone | 206 | * itself into sleep and waiting for system woke it or someone |
| 207 | * else in the head of the wait list up. | 207 | * else in the head of the wait list up. |
| 208 | */ | 208 | */ |
| 209 | if (sem->activity == 0) | 209 | if (sem->count == 0) |
| 210 | break; | 210 | break; |
| 211 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); | 211 | set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 212 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 212 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| @@ -214,7 +214,7 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) | |||
| 214 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 214 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 215 | } | 215 | } |
| 216 | /* got the lock */ | 216 | /* got the lock */ |
| 217 | sem->activity = -1; | 217 | sem->count = -1; |
| 218 | list_del(&waiter.list); | 218 | list_del(&waiter.list); |
| 219 | 219 | ||
| 220 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 220 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| @@ -235,9 +235,9 @@ int __down_write_trylock(struct rw_semaphore *sem) | |||
| 235 | 235 | ||
| 236 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 236 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 237 | 237 | ||
| 238 | if (sem->activity == 0) { | 238 | if (sem->count == 0) { |
| 239 | /* got the lock */ | 239 | /* got the lock */ |
| 240 | sem->activity = -1; | 240 | sem->count = -1; |
| 241 | ret = 1; | 241 | ret = 1; |
| 242 | } | 242 | } |
| 243 | 243 | ||
| @@ -255,7 +255,7 @@ void __up_read(struct rw_semaphore *sem) | |||
| 255 | 255 | ||
| 256 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 256 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 257 | 257 | ||
| 258 | if (--sem->activity == 0 && !list_empty(&sem->wait_list)) | 258 | if (--sem->count == 0 && !list_empty(&sem->wait_list)) |
| 259 | sem = __rwsem_wake_one_writer(sem); | 259 | sem = __rwsem_wake_one_writer(sem); |
| 260 | 260 | ||
| 261 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | 261 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); |
| @@ -270,7 +270,7 @@ void __up_write(struct rw_semaphore *sem) | |||
| 270 | 270 | ||
| 271 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 271 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 272 | 272 | ||
| 273 | sem->activity = 0; | 273 | sem->count = 0; |
| 274 | if (!list_empty(&sem->wait_list)) | 274 | if (!list_empty(&sem->wait_list)) |
| 275 | sem = __rwsem_do_wake(sem, 1); | 275 | sem = __rwsem_do_wake(sem, 1); |
| 276 | 276 | ||
| @@ -287,7 +287,7 @@ void __downgrade_write(struct rw_semaphore *sem) | |||
| 287 | 287 | ||
| 288 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | 288 | raw_spin_lock_irqsave(&sem->wait_lock, flags); |
| 289 | 289 | ||
| 290 | sem->activity = 1; | 290 | sem->count = 1; |
| 291 | if (!list_empty(&sem->wait_list)) | 291 | if (!list_empty(&sem->wait_list)) |
| 292 | sem = __rwsem_do_wake(sem, 0); | 292 | sem = __rwsem_do_wake(sem, 0); |
| 293 | 293 | ||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index dacc32142fcc..a2391ac135c8 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -82,9 +82,9 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name, | |||
| 82 | sem->count = RWSEM_UNLOCKED_VALUE; | 82 | sem->count = RWSEM_UNLOCKED_VALUE; |
| 83 | raw_spin_lock_init(&sem->wait_lock); | 83 | raw_spin_lock_init(&sem->wait_lock); |
| 84 | INIT_LIST_HEAD(&sem->wait_list); | 84 | INIT_LIST_HEAD(&sem->wait_list); |
| 85 | #ifdef CONFIG_SMP | 85 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
| 86 | sem->owner = NULL; | 86 | sem->owner = NULL; |
| 87 | sem->osq = NULL; | 87 | osq_lock_init(&sem->osq); |
| 88 | #endif | 88 | #endif |
| 89 | } | 89 | } |
| 90 | 90 | ||
| @@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | |||
| 262 | return false; | 262 | return false; |
| 263 | } | 263 | } |
| 264 | 264 | ||
| 265 | #ifdef CONFIG_SMP | 265 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
| 266 | /* | 266 | /* |
| 267 | * Try to acquire write lock before the writer has been put on wait queue. | 267 | * Try to acquire write lock before the writer has been put on wait queue. |
| 268 | */ | 268 | */ |
| @@ -285,10 +285,10 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | |||
| 285 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | 285 | static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
| 286 | { | 286 | { |
| 287 | struct task_struct *owner; | 287 | struct task_struct *owner; |
| 288 | bool on_cpu = true; | 288 | bool on_cpu = false; |
| 289 | 289 | ||
| 290 | if (need_resched()) | 290 | if (need_resched()) |
| 291 | return 0; | 291 | return false; |
| 292 | 292 | ||
| 293 | rcu_read_lock(); | 293 | rcu_read_lock(); |
| 294 | owner = ACCESS_ONCE(sem->owner); | 294 | owner = ACCESS_ONCE(sem->owner); |
| @@ -297,9 +297,9 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) | |||
| 297 | rcu_read_unlock(); | 297 | rcu_read_unlock(); |
| 298 | 298 | ||
| 299 | /* | 299 | /* |
| 300 | * If sem->owner is not set, the rwsem owner may have | 300 | * If sem->owner is not set, yet we have just recently entered the |
| 301 | * just acquired it and not set the owner yet or the rwsem | 301 | * slowpath, then there is a possibility reader(s) may have the lock. |
| 302 | * has been released. | 302 | * To be safe, avoid spinning in these situations. |
| 303 | */ | 303 | */ |
| 304 | return on_cpu; | 304 | return on_cpu; |
| 305 | } | 305 | } |
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 42f806de49d4..e2d3bc7f03b4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c | |||
| @@ -12,7 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | #include <linux/atomic.h> | 13 | #include <linux/atomic.h> |
| 14 | 14 | ||
| 15 | #if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM) | 15 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
| 16 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | 16 | static inline void rwsem_set_owner(struct rw_semaphore *sem) |
| 17 | { | 17 | { |
| 18 | sem->owner = current; | 18 | sem->owner = current; |
diff --git a/kernel/power/process.c b/kernel/power/process.c index 0ca8d83e2369..4ee194eb524b 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c | |||
| @@ -186,6 +186,7 @@ void thaw_processes(void) | |||
| 186 | 186 | ||
| 187 | printk("Restarting tasks ... "); | 187 | printk("Restarting tasks ... "); |
| 188 | 188 | ||
| 189 | __usermodehelper_set_disable_depth(UMH_FREEZING); | ||
| 189 | thaw_workqueues(); | 190 | thaw_workqueues(); |
| 190 | 191 | ||
| 191 | read_lock(&tasklist_lock); | 192 | read_lock(&tasklist_lock); |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4dd8822f732a..ed35a4790afe 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -306,7 +306,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
| 306 | error = suspend_ops->begin(state); | 306 | error = suspend_ops->begin(state); |
| 307 | if (error) | 307 | if (error) |
| 308 | goto Close; | 308 | goto Close; |
| 309 | } else if (state == PM_SUSPEND_FREEZE && freeze_ops->begin) { | 309 | } else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->begin) { |
| 310 | error = freeze_ops->begin(); | 310 | error = freeze_ops->begin(); |
| 311 | if (error) | 311 | if (error) |
| 312 | goto Close; | 312 | goto Close; |
| @@ -335,7 +335,7 @@ int suspend_devices_and_enter(suspend_state_t state) | |||
| 335 | Close: | 335 | Close: |
| 336 | if (need_suspend_ops(state) && suspend_ops->end) | 336 | if (need_suspend_ops(state) && suspend_ops->end) |
| 337 | suspend_ops->end(); | 337 | suspend_ops->end(); |
| 338 | else if (state == PM_SUSPEND_FREEZE && freeze_ops->end) | 338 | else if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->end) |
| 339 | freeze_ops->end(); | 339 | freeze_ops->end(); |
| 340 | 340 | ||
| 341 | return error; | 341 | return error; |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index f1ba77363fbb..625d0b0cd75a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -206,6 +206,70 @@ void rcu_bh_qs(int cpu) | |||
| 206 | rdp->passed_quiesce = 1; | 206 | rdp->passed_quiesce = 1; |
| 207 | } | 207 | } |
| 208 | 208 | ||
| 209 | static DEFINE_PER_CPU(int, rcu_sched_qs_mask); | ||
| 210 | |||
| 211 | static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | ||
| 212 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, | ||
| 213 | .dynticks = ATOMIC_INIT(1), | ||
| 214 | #ifdef CONFIG_NO_HZ_FULL_SYSIDLE | ||
| 215 | .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, | ||
| 216 | .dynticks_idle = ATOMIC_INIT(1), | ||
| 217 | #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | ||
| 218 | }; | ||
| 219 | |||
| 220 | /* | ||
| 221 | * Let the RCU core know that this CPU has gone through the scheduler, | ||
| 222 | * which is a quiescent state. This is called when the need for a | ||
| 223 | * quiescent state is urgent, so we burn an atomic operation and full | ||
| 224 | * memory barriers to let the RCU core know about it, regardless of what | ||
| 225 | * this CPU might (or might not) do in the near future. | ||
| 226 | * | ||
| 227 | * We inform the RCU core by emulating a zero-duration dyntick-idle | ||
| 228 | * period, which we in turn do by incrementing the ->dynticks counter | ||
| 229 | * by two. | ||
| 230 | */ | ||
| 231 | static void rcu_momentary_dyntick_idle(void) | ||
| 232 | { | ||
| 233 | unsigned long flags; | ||
| 234 | struct rcu_data *rdp; | ||
| 235 | struct rcu_dynticks *rdtp; | ||
| 236 | int resched_mask; | ||
| 237 | struct rcu_state *rsp; | ||
| 238 | |||
| 239 | local_irq_save(flags); | ||
| 240 | |||
| 241 | /* | ||
| 242 | * Yes, we can lose flag-setting operations. This is OK, because | ||
| 243 | * the flag will be set again after some delay. | ||
| 244 | */ | ||
| 245 | resched_mask = raw_cpu_read(rcu_sched_qs_mask); | ||
| 246 | raw_cpu_write(rcu_sched_qs_mask, 0); | ||
| 247 | |||
| 248 | /* Find the flavor that needs a quiescent state. */ | ||
| 249 | for_each_rcu_flavor(rsp) { | ||
| 250 | rdp = raw_cpu_ptr(rsp->rda); | ||
| 251 | if (!(resched_mask & rsp->flavor_mask)) | ||
| 252 | continue; | ||
| 253 | smp_mb(); /* rcu_sched_qs_mask before cond_resched_completed. */ | ||
| 254 | if (ACCESS_ONCE(rdp->mynode->completed) != | ||
| 255 | ACCESS_ONCE(rdp->cond_resched_completed)) | ||
| 256 | continue; | ||
| 257 | |||
| 258 | /* | ||
| 259 | * Pretend to be momentarily idle for the quiescent state. | ||
| 260 | * This allows the grace-period kthread to record the | ||
| 261 | * quiescent state, with no need for this CPU to do anything | ||
| 262 | * further. | ||
| 263 | */ | ||
| 264 | rdtp = this_cpu_ptr(&rcu_dynticks); | ||
| 265 | smp_mb__before_atomic(); /* Earlier stuff before QS. */ | ||
| 266 | atomic_add(2, &rdtp->dynticks); /* QS. */ | ||
| 267 | smp_mb__after_atomic(); /* Later stuff after QS. */ | ||
| 268 | break; | ||
| 269 | } | ||
| 270 | local_irq_restore(flags); | ||
| 271 | } | ||
| 272 | |||
| 209 | /* | 273 | /* |
| 210 | * Note a context switch. This is a quiescent state for RCU-sched, | 274 | * Note a context switch. This is a quiescent state for RCU-sched, |
| 211 | * and requires special handling for preemptible RCU. | 275 | * and requires special handling for preemptible RCU. |
| @@ -216,19 +280,12 @@ void rcu_note_context_switch(int cpu) | |||
| 216 | trace_rcu_utilization(TPS("Start context switch")); | 280 | trace_rcu_utilization(TPS("Start context switch")); |
| 217 | rcu_sched_qs(cpu); | 281 | rcu_sched_qs(cpu); |
| 218 | rcu_preempt_note_context_switch(cpu); | 282 | rcu_preempt_note_context_switch(cpu); |
| 283 | if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) | ||
| 284 | rcu_momentary_dyntick_idle(); | ||
| 219 | trace_rcu_utilization(TPS("End context switch")); | 285 | trace_rcu_utilization(TPS("End context switch")); |
| 220 | } | 286 | } |
| 221 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 287 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
| 222 | 288 | ||
| 223 | static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { | ||
| 224 | .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, | ||
| 225 | .dynticks = ATOMIC_INIT(1), | ||
| 226 | #ifdef CONFIG_NO_HZ_FULL_SYSIDLE | ||
| 227 | .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, | ||
| 228 | .dynticks_idle = ATOMIC_INIT(1), | ||
| 229 | #endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | ||
| 230 | }; | ||
| 231 | |||
| 232 | static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ | 289 | static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ |
| 233 | static long qhimark = 10000; /* If this many pending, ignore blimit. */ | 290 | static long qhimark = 10000; /* If this many pending, ignore blimit. */ |
| 234 | static long qlowmark = 100; /* Once only this many pending, use blimit. */ | 291 | static long qlowmark = 100; /* Once only this many pending, use blimit. */ |
| @@ -243,6 +300,13 @@ static ulong jiffies_till_next_fqs = ULONG_MAX; | |||
| 243 | module_param(jiffies_till_first_fqs, ulong, 0644); | 300 | module_param(jiffies_till_first_fqs, ulong, 0644); |
| 244 | module_param(jiffies_till_next_fqs, ulong, 0644); | 301 | module_param(jiffies_till_next_fqs, ulong, 0644); |
| 245 | 302 | ||
| 303 | /* | ||
| 304 | * How long the grace period must be before we start recruiting | ||
| 305 | * quiescent-state help from rcu_note_context_switch(). | ||
| 306 | */ | ||
| 307 | static ulong jiffies_till_sched_qs = HZ / 20; | ||
| 308 | module_param(jiffies_till_sched_qs, ulong, 0644); | ||
| 309 | |||
| 246 | static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, | 310 | static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, |
| 247 | struct rcu_data *rdp); | 311 | struct rcu_data *rdp); |
| 248 | static void force_qs_rnp(struct rcu_state *rsp, | 312 | static void force_qs_rnp(struct rcu_state *rsp, |
| @@ -853,6 +917,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
| 853 | bool *isidle, unsigned long *maxj) | 917 | bool *isidle, unsigned long *maxj) |
| 854 | { | 918 | { |
| 855 | unsigned int curr; | 919 | unsigned int curr; |
| 920 | int *rcrmp; | ||
| 856 | unsigned int snap; | 921 | unsigned int snap; |
| 857 | 922 | ||
| 858 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); | 923 | curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks); |
| @@ -893,27 +958,43 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
| 893 | } | 958 | } |
| 894 | 959 | ||
| 895 | /* | 960 | /* |
| 896 | * There is a possibility that a CPU in adaptive-ticks state | 961 | * A CPU running for an extended time within the kernel can |
| 897 | * might run in the kernel with the scheduling-clock tick disabled | 962 | * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, |
| 898 | * for an extended time period. Invoke rcu_kick_nohz_cpu() to | 963 | * even context-switching back and forth between a pair of |
| 899 | * force the CPU to restart the scheduling-clock tick in this | 964 | * in-kernel CPU-bound tasks cannot advance grace periods. |
| 900 | * CPU is in this state. | 965 | * So if the grace period is old enough, make the CPU pay attention. |
| 901 | */ | 966 | * Note that the unsynchronized assignments to the per-CPU |
| 902 | rcu_kick_nohz_cpu(rdp->cpu); | 967 | * rcu_sched_qs_mask variable are safe. Yes, setting of |
| 903 | 968 | * bits can be lost, but they will be set again on the next | |
| 904 | /* | 969 | * force-quiescent-state pass. So lost bit sets do not result |
| 905 | * Alternatively, the CPU might be running in the kernel | 970 | * in incorrect behavior, merely in a grace period lasting |
| 906 | * for an extended period of time without a quiescent state. | 971 | * a few jiffies longer than it might otherwise. Because |
| 907 | * Attempt to force the CPU through the scheduler to gain the | 972 | * there are at most four threads involved, and because the |
| 908 | * needed quiescent state, but only if the grace period has gone | 973 | * updates are only once every few jiffies, the probability of |
| 909 | * on for an uncommonly long time. If there are many stuck CPUs, | 974 | * lossage (and thus of slight grace-period extension) is |
| 910 | * we will beat on the first one until it gets unstuck, then move | 975 | * quite low. |
| 911 | * to the next. Only do this for the primary flavor of RCU. | 976 | * |
| 977 | * Note that if the jiffies_till_sched_qs boot/sysfs parameter | ||
| 978 | * is set too high, we override with half of the RCU CPU stall | ||
| 979 | * warning delay. | ||
| 912 | */ | 980 | */ |
| 913 | if (rdp->rsp == rcu_state_p && | 981 | rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); |
| 982 | if (ULONG_CMP_GE(jiffies, | ||
| 983 | rdp->rsp->gp_start + jiffies_till_sched_qs) || | ||
| 914 | ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { | 984 | ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { |
| 915 | rdp->rsp->jiffies_resched += 5; | 985 | if (!(ACCESS_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { |
| 916 | resched_cpu(rdp->cpu); | 986 | ACCESS_ONCE(rdp->cond_resched_completed) = |
| 987 | ACCESS_ONCE(rdp->mynode->completed); | ||
| 988 | smp_mb(); /* ->cond_resched_completed before *rcrmp. */ | ||
| 989 | ACCESS_ONCE(*rcrmp) = | ||
| 990 | ACCESS_ONCE(*rcrmp) + rdp->rsp->flavor_mask; | ||
| 991 | resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ | ||
| 992 | rdp->rsp->jiffies_resched += 5; /* Enable beating. */ | ||
| 993 | } else if (ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { | ||
| 994 | /* Time to beat on that CPU again! */ | ||
| 995 | resched_cpu(rdp->cpu); /* Force CPU into scheduler. */ | ||
| 996 | rdp->rsp->jiffies_resched += 5; /* Re-enable beating. */ | ||
| 997 | } | ||
| 917 | } | 998 | } |
| 918 | 999 | ||
| 919 | return 0; | 1000 | return 0; |
| @@ -3491,6 +3572,7 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
| 3491 | "rcu_node_fqs_1", | 3572 | "rcu_node_fqs_1", |
| 3492 | "rcu_node_fqs_2", | 3573 | "rcu_node_fqs_2", |
| 3493 | "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ | 3574 | "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */ |
| 3575 | static u8 fl_mask = 0x1; | ||
| 3494 | int cpustride = 1; | 3576 | int cpustride = 1; |
| 3495 | int i; | 3577 | int i; |
| 3496 | int j; | 3578 | int j; |
| @@ -3509,6 +3591,8 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
| 3509 | for (i = 1; i < rcu_num_lvls; i++) | 3591 | for (i = 1; i < rcu_num_lvls; i++) |
| 3510 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; | 3592 | rsp->level[i] = rsp->level[i - 1] + rsp->levelcnt[i - 1]; |
| 3511 | rcu_init_levelspread(rsp); | 3593 | rcu_init_levelspread(rsp); |
| 3594 | rsp->flavor_mask = fl_mask; | ||
| 3595 | fl_mask <<= 1; | ||
| 3512 | 3596 | ||
| 3513 | /* Initialize the elements themselves, starting from the leaves. */ | 3597 | /* Initialize the elements themselves, starting from the leaves. */ |
| 3514 | 3598 | ||
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bf2c1e669691..0f69a79c5b7d 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
| @@ -307,6 +307,9 @@ struct rcu_data { | |||
| 307 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ | 307 | /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ |
| 308 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ | 308 | unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ |
| 309 | unsigned long offline_fqs; /* Kicked due to being offline. */ | 309 | unsigned long offline_fqs; /* Kicked due to being offline. */ |
| 310 | unsigned long cond_resched_completed; | ||
| 311 | /* Grace period that needs help */ | ||
| 312 | /* from cond_resched(). */ | ||
| 310 | 313 | ||
| 311 | /* 5) __rcu_pending() statistics. */ | 314 | /* 5) __rcu_pending() statistics. */ |
| 312 | unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ | 315 | unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ |
| @@ -392,6 +395,7 @@ struct rcu_state { | |||
| 392 | struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ | 395 | struct rcu_node *level[RCU_NUM_LVLS]; /* Hierarchy levels. */ |
| 393 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ | 396 | u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ |
| 394 | u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ | 397 | u8 levelspread[RCU_NUM_LVLS]; /* kids/node in each level. */ |
| 398 | u8 flavor_mask; /* bit in flavor mask. */ | ||
| 395 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ | 399 | struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ |
| 396 | void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ | 400 | void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ |
| 397 | void (*func)(struct rcu_head *head)); | 401 | void (*func)(struct rcu_head *head)); |
| @@ -563,7 +567,7 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); | |||
| 563 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp); | 567 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp); |
| 564 | static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); | 568 | static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); |
| 565 | static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); | 569 | static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); |
| 566 | static void rcu_kick_nohz_cpu(int cpu); | 570 | static void __maybe_unused rcu_kick_nohz_cpu(int cpu); |
| 567 | static bool init_nocb_callback_list(struct rcu_data *rdp); | 571 | static bool init_nocb_callback_list(struct rcu_data *rdp); |
| 568 | static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); | 572 | static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); |
| 569 | static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); | 573 | static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index cbc2c45265e2..02ac0fb186b8 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
| @@ -2404,7 +2404,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) | |||
| 2404 | * if an adaptive-ticks CPU is failing to respond to the current grace | 2404 | * if an adaptive-ticks CPU is failing to respond to the current grace |
| 2405 | * period and has not be idle from an RCU perspective, kick it. | 2405 | * period and has not be idle from an RCU perspective, kick it. |
| 2406 | */ | 2406 | */ |
| 2407 | static void rcu_kick_nohz_cpu(int cpu) | 2407 | static void __maybe_unused rcu_kick_nohz_cpu(int cpu) |
| 2408 | { | 2408 | { |
| 2409 | #ifdef CONFIG_NO_HZ_FULL | 2409 | #ifdef CONFIG_NO_HZ_FULL |
| 2410 | if (tick_nohz_full_cpu(cpu)) | 2410 | if (tick_nohz_full_cpu(cpu)) |
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a2aeb4df0f60..bc7883570530 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
| @@ -200,12 +200,12 @@ void wait_rcu_gp(call_rcu_func_t crf) | |||
| 200 | EXPORT_SYMBOL_GPL(wait_rcu_gp); | 200 | EXPORT_SYMBOL_GPL(wait_rcu_gp); |
| 201 | 201 | ||
| 202 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD | 202 | #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD |
| 203 | static inline void debug_init_rcu_head(struct rcu_head *head) | 203 | void init_rcu_head(struct rcu_head *head) |
| 204 | { | 204 | { |
| 205 | debug_object_init(head, &rcuhead_debug_descr); | 205 | debug_object_init(head, &rcuhead_debug_descr); |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | static inline void debug_rcu_head_free(struct rcu_head *head) | 208 | void destroy_rcu_head(struct rcu_head *head) |
| 209 | { | 209 | { |
| 210 | debug_object_free(head, &rcuhead_debug_descr); | 210 | debug_object_free(head, &rcuhead_debug_descr); |
| 211 | } | 211 | } |
| @@ -350,21 +350,3 @@ static int __init check_cpu_stall_init(void) | |||
| 350 | early_initcall(check_cpu_stall_init); | 350 | early_initcall(check_cpu_stall_init); |
| 351 | 351 | ||
| 352 | #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ | 352 | #endif /* #ifdef CONFIG_RCU_STALL_COMMON */ |
| 353 | |||
| 354 | /* | ||
| 355 | * Hooks for cond_resched() and friends to avoid RCU CPU stall warnings. | ||
| 356 | */ | ||
| 357 | |||
| 358 | DEFINE_PER_CPU(int, rcu_cond_resched_count); | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Report a set of RCU quiescent states, for use by cond_resched() | ||
| 362 | * and friends. Out of line due to being called infrequently. | ||
| 363 | */ | ||
| 364 | void rcu_resched(void) | ||
| 365 | { | ||
| 366 | preempt_disable(); | ||
| 367 | __this_cpu_write(rcu_cond_resched_count, 0); | ||
| 368 | rcu_note_context_switch(smp_processor_id()); | ||
| 369 | preempt_enable(); | ||
| 370 | } | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bdf01b494fe..bc1638b33449 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -4147,7 +4147,6 @@ static void __cond_resched(void) | |||
| 4147 | 4147 | ||
| 4148 | int __sched _cond_resched(void) | 4148 | int __sched _cond_resched(void) |
| 4149 | { | 4149 | { |
| 4150 | rcu_cond_resched(); | ||
| 4151 | if (should_resched()) { | 4150 | if (should_resched()) { |
| 4152 | __cond_resched(); | 4151 | __cond_resched(); |
| 4153 | return 1; | 4152 | return 1; |
| @@ -4166,18 +4165,15 @@ EXPORT_SYMBOL(_cond_resched); | |||
| 4166 | */ | 4165 | */ |
| 4167 | int __cond_resched_lock(spinlock_t *lock) | 4166 | int __cond_resched_lock(spinlock_t *lock) |
| 4168 | { | 4167 | { |
| 4169 | bool need_rcu_resched = rcu_should_resched(); | ||
| 4170 | int resched = should_resched(); | 4168 | int resched = should_resched(); |
| 4171 | int ret = 0; | 4169 | int ret = 0; |
| 4172 | 4170 | ||
| 4173 | lockdep_assert_held(lock); | 4171 | lockdep_assert_held(lock); |
| 4174 | 4172 | ||
| 4175 | if (spin_needbreak(lock) || resched || need_rcu_resched) { | 4173 | if (spin_needbreak(lock) || resched) { |
| 4176 | spin_unlock(lock); | 4174 | spin_unlock(lock); |
| 4177 | if (resched) | 4175 | if (resched) |
| 4178 | __cond_resched(); | 4176 | __cond_resched(); |
| 4179 | else if (unlikely(need_rcu_resched)) | ||
| 4180 | rcu_resched(); | ||
| 4181 | else | 4177 | else |
| 4182 | cpu_relax(); | 4178 | cpu_relax(); |
| 4183 | ret = 1; | 4179 | ret = 1; |
| @@ -4191,7 +4187,6 @@ int __sched __cond_resched_softirq(void) | |||
| 4191 | { | 4187 | { |
| 4192 | BUG_ON(!in_softirq()); | 4188 | BUG_ON(!in_softirq()); |
| 4193 | 4189 | ||
| 4194 | rcu_cond_resched(); /* BH disabled OK, just recording QSes. */ | ||
| 4195 | if (should_resched()) { | 4190 | if (should_resched()) { |
| 4196 | local_bh_enable(); | 4191 | local_bh_enable(); |
| 4197 | __cond_resched(); | 4192 | __cond_resched(); |
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 695f9773bb60..627b3c34b821 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c | |||
| @@ -608,7 +608,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
| 608 | 608 | ||
| 609 | avg_atom = p->se.sum_exec_runtime; | 609 | avg_atom = p->se.sum_exec_runtime; |
| 610 | if (nr_switches) | 610 | if (nr_switches) |
| 611 | do_div(avg_atom, nr_switches); | 611 | avg_atom = div64_ul(avg_atom, nr_switches); |
| 612 | else | 612 | else |
| 613 | avg_atom = -1LL; | 613 | avg_atom = -1LL; |
| 614 | 614 | ||
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 88c9c65a430d..fe75444ae7ec 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c | |||
| @@ -585,9 +585,14 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, | |||
| 585 | struct itimerspec *new_setting, | 585 | struct itimerspec *new_setting, |
| 586 | struct itimerspec *old_setting) | 586 | struct itimerspec *old_setting) |
| 587 | { | 587 | { |
| 588 | ktime_t exp; | ||
| 589 | |||
| 588 | if (!rtcdev) | 590 | if (!rtcdev) |
| 589 | return -ENOTSUPP; | 591 | return -ENOTSUPP; |
| 590 | 592 | ||
| 593 | if (flags & ~TIMER_ABSTIME) | ||
| 594 | return -EINVAL; | ||
| 595 | |||
| 591 | if (old_setting) | 596 | if (old_setting) |
| 592 | alarm_timer_get(timr, old_setting); | 597 | alarm_timer_get(timr, old_setting); |
| 593 | 598 | ||
| @@ -597,8 +602,16 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, | |||
| 597 | 602 | ||
| 598 | /* start the timer */ | 603 | /* start the timer */ |
| 599 | timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); | 604 | timr->it.alarm.interval = timespec_to_ktime(new_setting->it_interval); |
| 600 | alarm_start(&timr->it.alarm.alarmtimer, | 605 | exp = timespec_to_ktime(new_setting->it_value); |
| 601 | timespec_to_ktime(new_setting->it_value)); | 606 | /* Convert (if necessary) to absolute time */ |
| 607 | if (flags != TIMER_ABSTIME) { | ||
| 608 | ktime_t now; | ||
| 609 | |||
| 610 | now = alarm_bases[timr->it.alarm.alarmtimer.type].gettime(); | ||
| 611 | exp = ktime_add(now, exp); | ||
| 612 | } | ||
| 613 | |||
| 614 | alarm_start(&timr->it.alarm.alarmtimer, exp); | ||
| 602 | return 0; | 615 | return 0; |
| 603 | } | 616 | } |
| 604 | 617 | ||
| @@ -730,6 +743,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, | |||
| 730 | if (!alarmtimer_get_rtcdev()) | 743 | if (!alarmtimer_get_rtcdev()) |
| 731 | return -ENOTSUPP; | 744 | return -ENOTSUPP; |
| 732 | 745 | ||
| 746 | if (flags & ~TIMER_ABSTIME) | ||
| 747 | return -EINVAL; | ||
| 748 | |||
| 733 | if (!capable(CAP_WAKE_ALARM)) | 749 | if (!capable(CAP_WAKE_ALARM)) |
| 734 | return -EPERM; | 750 | return -EPERM; |
| 735 | 751 | ||
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5b372e3ed675..ac9d1dad630b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
| @@ -265,12 +265,12 @@ static void update_ftrace_function(void) | |||
| 265 | func = ftrace_ops_list_func; | 265 | func = ftrace_ops_list_func; |
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | update_function_graph_func(); | ||
| 269 | |||
| 268 | /* If there's no change, then do nothing more here */ | 270 | /* If there's no change, then do nothing more here */ |
| 269 | if (ftrace_trace_function == func) | 271 | if (ftrace_trace_function == func) |
| 270 | return; | 272 | return; |
| 271 | 273 | ||
| 272 | update_function_graph_func(); | ||
| 273 | |||
| 274 | /* | 274 | /* |
| 275 | * If we are using the list function, it doesn't care | 275 | * If we are using the list function, it doesn't care |
| 276 | * about the function_trace_ops. | 276 | * about the function_trace_ops. |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 7c56c3d06943..ff7027199a9a 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
| @@ -616,10 +616,6 @@ int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, | |||
| 616 | struct ring_buffer_per_cpu *cpu_buffer; | 616 | struct ring_buffer_per_cpu *cpu_buffer; |
| 617 | struct rb_irq_work *work; | 617 | struct rb_irq_work *work; |
| 618 | 618 | ||
| 619 | if ((cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) || | ||
| 620 | (cpu != RING_BUFFER_ALL_CPUS && !ring_buffer_empty_cpu(buffer, cpu))) | ||
| 621 | return POLLIN | POLLRDNORM; | ||
| 622 | |||
| 623 | if (cpu == RING_BUFFER_ALL_CPUS) | 619 | if (cpu == RING_BUFFER_ALL_CPUS) |
| 624 | work = &buffer->irq_work; | 620 | work = &buffer->irq_work; |
| 625 | else { | 621 | else { |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f243444a3772..291397e66669 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -466,6 +466,12 @@ int __trace_puts(unsigned long ip, const char *str, int size) | |||
| 466 | struct print_entry *entry; | 466 | struct print_entry *entry; |
| 467 | unsigned long irq_flags; | 467 | unsigned long irq_flags; |
| 468 | int alloc; | 468 | int alloc; |
| 469 | int pc; | ||
| 470 | |||
| 471 | if (!(trace_flags & TRACE_ITER_PRINTK)) | ||
| 472 | return 0; | ||
| 473 | |||
| 474 | pc = preempt_count(); | ||
| 469 | 475 | ||
| 470 | if (unlikely(tracing_selftest_running || tracing_disabled)) | 476 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
| 471 | return 0; | 477 | return 0; |
| @@ -475,7 +481,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) | |||
| 475 | local_save_flags(irq_flags); | 481 | local_save_flags(irq_flags); |
| 476 | buffer = global_trace.trace_buffer.buffer; | 482 | buffer = global_trace.trace_buffer.buffer; |
| 477 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, | 483 | event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, |
| 478 | irq_flags, preempt_count()); | 484 | irq_flags, pc); |
| 479 | if (!event) | 485 | if (!event) |
| 480 | return 0; | 486 | return 0; |
| 481 | 487 | ||
| @@ -492,6 +498,7 @@ int __trace_puts(unsigned long ip, const char *str, int size) | |||
| 492 | entry->buf[size] = '\0'; | 498 | entry->buf[size] = '\0'; |
| 493 | 499 | ||
| 494 | __buffer_unlock_commit(buffer, event); | 500 | __buffer_unlock_commit(buffer, event); |
| 501 | ftrace_trace_stack(buffer, irq_flags, 4, pc); | ||
| 495 | 502 | ||
| 496 | return size; | 503 | return size; |
| 497 | } | 504 | } |
| @@ -509,6 +516,12 @@ int __trace_bputs(unsigned long ip, const char *str) | |||
| 509 | struct bputs_entry *entry; | 516 | struct bputs_entry *entry; |
| 510 | unsigned long irq_flags; | 517 | unsigned long irq_flags; |
| 511 | int size = sizeof(struct bputs_entry); | 518 | int size = sizeof(struct bputs_entry); |
| 519 | int pc; | ||
| 520 | |||
| 521 | if (!(trace_flags & TRACE_ITER_PRINTK)) | ||
| 522 | return 0; | ||
| 523 | |||
| 524 | pc = preempt_count(); | ||
| 512 | 525 | ||
| 513 | if (unlikely(tracing_selftest_running || tracing_disabled)) | 526 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
| 514 | return 0; | 527 | return 0; |
| @@ -516,7 +529,7 @@ int __trace_bputs(unsigned long ip, const char *str) | |||
| 516 | local_save_flags(irq_flags); | 529 | local_save_flags(irq_flags); |
| 517 | buffer = global_trace.trace_buffer.buffer; | 530 | buffer = global_trace.trace_buffer.buffer; |
| 518 | event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, | 531 | event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, |
| 519 | irq_flags, preempt_count()); | 532 | irq_flags, pc); |
| 520 | if (!event) | 533 | if (!event) |
| 521 | return 0; | 534 | return 0; |
| 522 | 535 | ||
| @@ -525,6 +538,7 @@ int __trace_bputs(unsigned long ip, const char *str) | |||
| 525 | entry->str = str; | 538 | entry->str = str; |
| 526 | 539 | ||
| 527 | __buffer_unlock_commit(buffer, event); | 540 | __buffer_unlock_commit(buffer, event); |
| 541 | ftrace_trace_stack(buffer, irq_flags, 4, pc); | ||
| 528 | 542 | ||
| 529 | return 1; | 543 | return 1; |
| 530 | } | 544 | } |
| @@ -809,7 +823,7 @@ static struct { | |||
| 809 | { trace_clock_local, "local", 1 }, | 823 | { trace_clock_local, "local", 1 }, |
| 810 | { trace_clock_global, "global", 1 }, | 824 | { trace_clock_global, "global", 1 }, |
| 811 | { trace_clock_counter, "counter", 0 }, | 825 | { trace_clock_counter, "counter", 0 }, |
| 812 | { trace_clock_jiffies, "uptime", 1 }, | 826 | { trace_clock_jiffies, "uptime", 0 }, |
| 813 | { trace_clock, "perf", 1 }, | 827 | { trace_clock, "perf", 1 }, |
| 814 | ARCH_TRACE_CLOCKS | 828 | ARCH_TRACE_CLOCKS |
| 815 | }; | 829 | }; |
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 26dc348332b7..57b67b1f24d1 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c | |||
| @@ -59,13 +59,14 @@ u64 notrace trace_clock(void) | |||
| 59 | 59 | ||
| 60 | /* | 60 | /* |
| 61 | * trace_jiffy_clock(): Simply use jiffies as a clock counter. | 61 | * trace_jiffy_clock(): Simply use jiffies as a clock counter. |
| 62 | * Note that this use of jiffies_64 is not completely safe on | ||
| 63 | * 32-bit systems. But the window is tiny, and the effect if | ||
| 64 | * we are affected is that we will have an obviously bogus | ||
| 65 | * timestamp on a trace event - i.e. not life threatening. | ||
| 62 | */ | 66 | */ |
| 63 | u64 notrace trace_clock_jiffies(void) | 67 | u64 notrace trace_clock_jiffies(void) |
| 64 | { | 68 | { |
| 65 | u64 jiffy = jiffies - INITIAL_JIFFIES; | 69 | return jiffies_64_to_clock_t(jiffies_64 - INITIAL_JIFFIES); |
| 66 | |||
| 67 | /* Return nsecs */ | ||
| 68 | return (u64)jiffies_to_usecs(jiffy) * 1000ULL; | ||
| 69 | } | 70 | } |
| 70 | 71 | ||
| 71 | /* | 72 | /* |
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index f99e0b3bca8c..2de53628689f 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c | |||
| @@ -470,6 +470,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file) | |||
| 470 | 470 | ||
| 471 | list_del(&file->list); | 471 | list_del(&file->list); |
| 472 | remove_subsystem(file->system); | 472 | remove_subsystem(file->system); |
| 473 | free_event_filter(file->filter); | ||
| 473 | kmem_cache_free(file_cachep, file); | 474 | kmem_cache_free(file_cachep, file); |
| 474 | } | 475 | } |
| 475 | 476 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6203d2900877..35974ac69600 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -3284,6 +3284,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq) | |||
| 3284 | } | 3284 | } |
| 3285 | } | 3285 | } |
| 3286 | 3286 | ||
| 3287 | dev_set_uevent_suppress(&wq_dev->dev, false); | ||
| 3287 | kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); | 3288 | kobject_uevent(&wq_dev->dev.kobj, KOBJ_ADD); |
| 3288 | return 0; | 3289 | return 0; |
| 3289 | } | 3290 | } |
| @@ -4879,7 +4880,7 @@ static void __init wq_numa_init(void) | |||
| 4879 | BUG_ON(!tbl); | 4880 | BUG_ON(!tbl); |
| 4880 | 4881 | ||
| 4881 | for_each_node(node) | 4882 | for_each_node(node) |
| 4882 | BUG_ON(!alloc_cpumask_var_node(&tbl[node], GFP_KERNEL, | 4883 | BUG_ON(!zalloc_cpumask_var_node(&tbl[node], GFP_KERNEL, |
| 4883 | node_online(node) ? node : NUMA_NO_NODE)); | 4884 | node_online(node) ? node : NUMA_NO_NODE)); |
| 4884 | 4885 | ||
| 4885 | for_each_possible_cpu(cpu) { | 4886 | for_each_possible_cpu(cpu) { |
