aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/cgroup.c110
-rw-r--r--kernel/cpu_pm.c2
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/irq/chip.c33
-rw-r--r--kernel/irq/handle.c4
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/irqdesc.c20
-rw-r--r--kernel/irq/irqdomain.c1
-rw-r--r--kernel/irq/manage.c12
-rw-r--r--kernel/irq/proc.c2
-rw-r--r--kernel/irq/resend.c2
-rw-r--r--kernel/locking/qspinlock.c2
-rw-r--r--kernel/membarrier.c66
-rw-r--r--kernel/sched/core.c51
-rw-r--r--kernel/sched/wait.c7
-rw-r--r--kernel/sys_ni.c3
-rw-r--r--kernel/time/clockevents.c42
-rw-r--r--kernel/time/tick-common.c1
-rw-r--r--kernel/time/tick-sched.c15
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/time/timer_list.c54
22 files changed, 277 insertions, 159 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index d4988410b410..53abf008ecb3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
100obj-$(CONFIG_JUMP_LABEL) += jump_label.o 100obj-$(CONFIG_JUMP_LABEL) += jump_label.o
101obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o 101obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o
102obj-$(CONFIG_TORTURE_TEST) += torture.o 102obj-$(CONFIG_TORTURE_TEST) += torture.o
103obj-$(CONFIG_MEMBARRIER) += membarrier.o
103 104
104obj-$(CONFIG_HAS_IOMEM) += memremap.o 105obj-$(CONFIG_HAS_IOMEM) += memremap.o
105 106
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cf0f79f1fc9..2c9eae6ad970 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,7 +46,6 @@
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/rwsem.h> 48#include <linux/rwsem.h>
49#include <linux/percpu-rwsem.h>
50#include <linux/string.h> 49#include <linux/string.h>
51#include <linux/sort.h> 50#include <linux/sort.h>
52#include <linux/kmod.h> 51#include <linux/kmod.h>
@@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
104 */ 103 */
105static DEFINE_SPINLOCK(release_agent_path_lock); 104static DEFINE_SPINLOCK(release_agent_path_lock);
106 105
107struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
108
109#define cgroup_assert_mutex_or_rcu_locked() \ 106#define cgroup_assert_mutex_or_rcu_locked() \
110 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ 107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
111 !lockdep_is_held(&cgroup_mutex), \ 108 !lockdep_is_held(&cgroup_mutex), \
@@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset,
874 return cset; 871 return cset;
875} 872}
876 873
874void cgroup_threadgroup_change_begin(struct task_struct *tsk)
875{
876 down_read(&tsk->signal->group_rwsem);
877}
878
879void cgroup_threadgroup_change_end(struct task_struct *tsk)
880{
881 up_read(&tsk->signal->group_rwsem);
882}
883
884/**
885 * threadgroup_lock - lock threadgroup
886 * @tsk: member task of the threadgroup to lock
887 *
888 * Lock the threadgroup @tsk belongs to. No new task is allowed to enter
889 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
890 * change ->group_leader/pid. This is useful for cases where the threadgroup
891 * needs to stay stable across blockable operations.
892 *
893 * fork and exit explicitly call threadgroup_change_{begin|end}() for
894 * synchronization. While held, no new task will be added to threadgroup
895 * and no existing live task will have its PF_EXITING set.
896 *
897 * de_thread() does threadgroup_change_{begin|end}() when a non-leader
898 * sub-thread becomes a new leader.
899 */
900static void threadgroup_lock(struct task_struct *tsk)
901{
902 down_write(&tsk->signal->group_rwsem);
903}
904
905/**
906 * threadgroup_unlock - unlock threadgroup
907 * @tsk: member task of the threadgroup to unlock
908 *
909 * Reverse threadgroup_lock().
910 */
911static inline void threadgroup_unlock(struct task_struct *tsk)
912{
913 up_write(&tsk->signal->group_rwsem);
914}
915
877static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) 916static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
878{ 917{
879 struct cgroup *root_cgrp = kf_root->kn->priv; 918 struct cgroup *root_cgrp = kf_root->kn->priv;
@@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
2074 lockdep_assert_held(&css_set_rwsem); 2113 lockdep_assert_held(&css_set_rwsem);
2075 2114
2076 /* 2115 /*
2077 * We are synchronized through cgroup_threadgroup_rwsem against 2116 * We are synchronized through threadgroup_lock() against PF_EXITING
2078 * PF_EXITING setting such that we can't race against cgroup_exit() 2117 * setting such that we can't race against cgroup_exit() changing the
2079 * changing the css_set to init_css_set and dropping the old one. 2118 * css_set to init_css_set and dropping the old one.
2080 */ 2119 */
2081 WARN_ON_ONCE(tsk->flags & PF_EXITING); 2120 WARN_ON_ONCE(tsk->flags & PF_EXITING);
2082 old_cset = task_css_set(tsk); 2121 old_cset = task_css_set(tsk);
@@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
2133 * @src_cset and add it to @preloaded_csets, which should later be cleaned 2172 * @src_cset and add it to @preloaded_csets, which should later be cleaned
2134 * up by cgroup_migrate_finish(). 2173 * up by cgroup_migrate_finish().
2135 * 2174 *
2136 * This function may be called without holding cgroup_threadgroup_rwsem 2175 * This function may be called without holding threadgroup_lock even if the
2137 * even if the target is a process. Threads may be created and destroyed 2176 * target is a process. Threads may be created and destroyed but as long
2138 * but as long as cgroup_mutex is not dropped, no new css_set can be put 2177 * as cgroup_mutex is not dropped, no new css_set can be put into play and
2139 * into play and the preloaded css_sets are guaranteed to cover all 2178 * the preloaded css_sets are guaranteed to cover all migrations.
2140 * migrations.
2141 */ 2179 */
2142static void cgroup_migrate_add_src(struct css_set *src_cset, 2180static void cgroup_migrate_add_src(struct css_set *src_cset,
2143 struct cgroup *dst_cgrp, 2181 struct cgroup *dst_cgrp,
@@ -2240,7 +2278,7 @@ err:
2240 * @threadgroup: whether @leader points to the whole process or a single task 2278 * @threadgroup: whether @leader points to the whole process or a single task
2241 * 2279 *
2242 * Migrate a process or task denoted by @leader to @cgrp. If migrating a 2280 * Migrate a process or task denoted by @leader to @cgrp. If migrating a
2243 * process, the caller must be holding cgroup_threadgroup_rwsem. The 2281 * process, the caller must be holding threadgroup_lock of @leader. The
2244 * caller is also responsible for invoking cgroup_migrate_add_src() and 2282 * caller is also responsible for invoking cgroup_migrate_add_src() and
2245 * cgroup_migrate_prepare_dst() on the targets before invoking this 2283 * cgroup_migrate_prepare_dst() on the targets before invoking this
2246 * function and following up with cgroup_migrate_finish(). 2284 * function and following up with cgroup_migrate_finish().
@@ -2368,7 +2406,7 @@ out_release_tset:
2368 * @leader: the task or the leader of the threadgroup to be attached 2406 * @leader: the task or the leader of the threadgroup to be attached
2369 * @threadgroup: attach the whole threadgroup? 2407 * @threadgroup: attach the whole threadgroup?
2370 * 2408 *
2371 * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. 2409 * Call holding cgroup_mutex and threadgroup_lock of @leader.
2372 */ 2410 */
2373static int cgroup_attach_task(struct cgroup *dst_cgrp, 2411static int cgroup_attach_task(struct cgroup *dst_cgrp,
2374 struct task_struct *leader, bool threadgroup) 2412 struct task_struct *leader, bool threadgroup)
@@ -2460,13 +2498,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2460 if (!cgrp) 2498 if (!cgrp)
2461 return -ENODEV; 2499 return -ENODEV;
2462 2500
2463 percpu_down_write(&cgroup_threadgroup_rwsem); 2501retry_find_task:
2464 rcu_read_lock(); 2502 rcu_read_lock();
2465 if (pid) { 2503 if (pid) {
2466 tsk = find_task_by_vpid(pid); 2504 tsk = find_task_by_vpid(pid);
2467 if (!tsk) { 2505 if (!tsk) {
2506 rcu_read_unlock();
2468 ret = -ESRCH; 2507 ret = -ESRCH;
2469 goto out_unlock_rcu; 2508 goto out_unlock_cgroup;
2470 } 2509 }
2471 } else { 2510 } else {
2472 tsk = current; 2511 tsk = current;
@@ -2482,23 +2521,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2482 */ 2521 */
2483 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { 2522 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
2484 ret = -EINVAL; 2523 ret = -EINVAL;
2485 goto out_unlock_rcu; 2524 rcu_read_unlock();
2525 goto out_unlock_cgroup;
2486 } 2526 }
2487 2527
2488 get_task_struct(tsk); 2528 get_task_struct(tsk);
2489 rcu_read_unlock(); 2529 rcu_read_unlock();
2490 2530
2531 threadgroup_lock(tsk);
2532 if (threadgroup) {
2533 if (!thread_group_leader(tsk)) {
2534 /*
2535 * a race with de_thread from another thread's exec()
2536 * may strip us of our leadership, if this happens,
2537 * there is no choice but to throw this task away and
2538 * try again; this is
2539 * "double-double-toil-and-trouble-check locking".
2540 */
2541 threadgroup_unlock(tsk);
2542 put_task_struct(tsk);
2543 goto retry_find_task;
2544 }
2545 }
2546
2491 ret = cgroup_procs_write_permission(tsk, cgrp, of); 2547 ret = cgroup_procs_write_permission(tsk, cgrp, of);
2492 if (!ret) 2548 if (!ret)
2493 ret = cgroup_attach_task(cgrp, tsk, threadgroup); 2549 ret = cgroup_attach_task(cgrp, tsk, threadgroup);
2494 2550
2495 put_task_struct(tsk); 2551 threadgroup_unlock(tsk);
2496 goto out_unlock_threadgroup;
2497 2552
2498out_unlock_rcu: 2553 put_task_struct(tsk);
2499 rcu_read_unlock(); 2554out_unlock_cgroup:
2500out_unlock_threadgroup:
2501 percpu_up_write(&cgroup_threadgroup_rwsem);
2502 cgroup_kn_unlock(of->kn); 2555 cgroup_kn_unlock(of->kn);
2503 return ret ?: nbytes; 2556 return ret ?: nbytes;
2504} 2557}
@@ -2643,8 +2696,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2643 2696
2644 lockdep_assert_held(&cgroup_mutex); 2697 lockdep_assert_held(&cgroup_mutex);
2645 2698
2646 percpu_down_write(&cgroup_threadgroup_rwsem);
2647
2648 /* look up all csses currently attached to @cgrp's subtree */ 2699 /* look up all csses currently attached to @cgrp's subtree */
2649 down_read(&css_set_rwsem); 2700 down_read(&css_set_rwsem);
2650 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { 2701 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2700,8 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2700 goto out_finish; 2751 goto out_finish;
2701 last_task = task; 2752 last_task = task;
2702 2753
2754 threadgroup_lock(task);
2755 /* raced against de_thread() from another thread? */
2756 if (!thread_group_leader(task)) {
2757 threadgroup_unlock(task);
2758 put_task_struct(task);
2759 continue;
2760 }
2761
2703 ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); 2762 ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
2704 2763
2764 threadgroup_unlock(task);
2705 put_task_struct(task); 2765 put_task_struct(task);
2706 2766
2707 if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) 2767 if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2711,7 +2771,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2711 2771
2712out_finish: 2772out_finish:
2713 cgroup_migrate_finish(&preloaded_csets); 2773 cgroup_migrate_finish(&preloaded_csets);
2714 percpu_up_write(&cgroup_threadgroup_rwsem);
2715 return ret; 2774 return ret;
2716} 2775}
2717 2776
@@ -5024,7 +5083,6 @@ int __init cgroup_init(void)
5024 unsigned long key; 5083 unsigned long key;
5025 int ssid, err; 5084 int ssid, err;
5026 5085
5027 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5028 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); 5086 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
5029 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); 5087 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
5030 5088
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c
index 9656a3c36503..009cc9a17d95 100644
--- a/kernel/cpu_pm.c
+++ b/kernel/cpu_pm.c
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter);
180 * low power state that may have caused some blocks in the same power domain 180 * low power state that may have caused some blocks in the same power domain
181 * to reset. 181 * to reset.
182 * 182 *
183 * Must be called after cpu_pm_exit has been called on all cpus in the power 183 * Must be called after cpu_cluster_pm_enter has been called for the power
184 * domain, and before cpu_pm_exit has been called on any cpu in the power 184 * domain, and before cpu_pm_exit has been called on any cpu in the power
185 * domain. Notified drivers can include VFP co-processor, interrupt controller 185 * domain. Notified drivers can include VFP co-processor, interrupt controller
186 * and its PM extensions, local CPU timers context save/restore which 186 * and its PM extensions, local CPU timers context save/restore which
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d5f0f118a63..2845623fb582 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1149,6 +1149,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1149 tty_audit_fork(sig); 1149 tty_audit_fork(sig);
1150 sched_autogroup_fork(sig); 1150 sched_autogroup_fork(sig);
1151 1151
1152#ifdef CONFIG_CGROUPS
1153 init_rwsem(&sig->group_rwsem);
1154#endif
1155
1152 sig->oom_score_adj = current->signal->oom_score_adj; 1156 sig->oom_score_adj = current->signal->oom_score_adj;
1153 sig->oom_score_adj_min = current->signal->oom_score_adj_min; 1157 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
1154 1158
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6e40a9539763..e28169dd1c36 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data)
83 83
84 if (!desc) 84 if (!desc)
85 return -EINVAL; 85 return -EINVAL;
86 desc->irq_data.handler_data = data; 86 desc->irq_common_data.handler_data = data;
87 irq_put_desc_unlock(desc, flags); 87 irq_put_desc_unlock(desc, flags);
88 return 0; 88 return 0;
89} 89}
@@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
105 105
106 if (!desc) 106 if (!desc)
107 return -EINVAL; 107 return -EINVAL;
108 desc->irq_data.msi_desc = entry; 108 desc->irq_common_data.msi_desc = entry;
109 if (entry && !irq_offset) 109 if (entry && !irq_offset)
110 entry->irq = irq_base; 110 entry->irq = irq_base;
111 irq_put_desc_unlock(desc, flags); 111 irq_put_desc_unlock(desc, flags);
@@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc)
372 372
373/** 373/**
374 * handle_simple_irq - Simple and software-decoded IRQs. 374 * handle_simple_irq - Simple and software-decoded IRQs.
375 * @irq: the interrupt number
376 * @desc: the interrupt description structure for this irq 375 * @desc: the interrupt description structure for this irq
377 * 376 *
378 * Simple interrupts are either sent from a demultiplexing interrupt 377 * Simple interrupts are either sent from a demultiplexing interrupt
@@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc)
382 * Note: The caller is expected to handle the ack, clear, mask and 381 * Note: The caller is expected to handle the ack, clear, mask and
383 * unmask issues if necessary. 382 * unmask issues if necessary.
384 */ 383 */
385void 384void handle_simple_irq(struct irq_desc *desc)
386handle_simple_irq(unsigned int irq, struct irq_desc *desc)
387{ 385{
388 raw_spin_lock(&desc->lock); 386 raw_spin_lock(&desc->lock);
389 387
@@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc)
425 423
426/** 424/**
427 * handle_level_irq - Level type irq handler 425 * handle_level_irq - Level type irq handler
428 * @irq: the interrupt number
429 * @desc: the interrupt description structure for this irq 426 * @desc: the interrupt description structure for this irq
430 * 427 *
431 * Level type interrupts are active as long as the hardware line has 428 * Level type interrupts are active as long as the hardware line has
@@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc)
433 * it after the associated handler has acknowledged the device, so the 430 * it after the associated handler has acknowledged the device, so the
434 * interrupt line is back to inactive. 431 * interrupt line is back to inactive.
435 */ 432 */
436void 433void handle_level_irq(struct irq_desc *desc)
437handle_level_irq(unsigned int irq, struct irq_desc *desc)
438{ 434{
439 raw_spin_lock(&desc->lock); 435 raw_spin_lock(&desc->lock);
440 mask_ack_irq(desc); 436 mask_ack_irq(desc);
@@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
496 492
497/** 493/**
498 * handle_fasteoi_irq - irq handler for transparent controllers 494 * handle_fasteoi_irq - irq handler for transparent controllers
499 * @irq: the interrupt number
500 * @desc: the interrupt description structure for this irq 495 * @desc: the interrupt description structure for this irq
501 * 496 *
502 * Only a single callback will be issued to the chip: an ->eoi() 497 * Only a single callback will be issued to the chip: an ->eoi()
@@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
504 * for modern forms of interrupt handlers, which handle the flow 499 * for modern forms of interrupt handlers, which handle the flow
505 * details in hardware, transparently. 500 * details in hardware, transparently.
506 */ 501 */
507void 502void handle_fasteoi_irq(struct irq_desc *desc)
508handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
509{ 503{
510 struct irq_chip *chip = desc->irq_data.chip; 504 struct irq_chip *chip = desc->irq_data.chip;
511 505
@@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
546 540
547/** 541/**
548 * handle_edge_irq - edge type IRQ handler 542 * handle_edge_irq - edge type IRQ handler
549 * @irq: the interrupt number
550 * @desc: the interrupt description structure for this irq 543 * @desc: the interrupt description structure for this irq
551 * 544 *
552 * Interrupt occures on the falling and/or rising edge of a hardware 545 * Interrupt occures on the falling and/or rising edge of a hardware
@@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
560 * the handler was running. If all pending interrupts are handled, the 553 * the handler was running. If all pending interrupts are handled, the
561 * loop is left. 554 * loop is left.
562 */ 555 */
563void 556void handle_edge_irq(struct irq_desc *desc)
564handle_edge_irq(unsigned int irq, struct irq_desc *desc)
565{ 557{
566 raw_spin_lock(&desc->lock); 558 raw_spin_lock(&desc->lock);
567 559
@@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq);
618#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER 610#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
619/** 611/**
620 * handle_edge_eoi_irq - edge eoi type IRQ handler 612 * handle_edge_eoi_irq - edge eoi type IRQ handler
621 * @irq: the interrupt number
622 * @desc: the interrupt description structure for this irq 613 * @desc: the interrupt description structure for this irq
623 * 614 *
624 * Similar as the above handle_edge_irq, but using eoi and w/o the 615 * Similar as the above handle_edge_irq, but using eoi and w/o the
625 * mask/unmask logic. 616 * mask/unmask logic.
626 */ 617 */
627void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) 618void handle_edge_eoi_irq(struct irq_desc *desc)
628{ 619{
629 struct irq_chip *chip = irq_desc_get_chip(desc); 620 struct irq_chip *chip = irq_desc_get_chip(desc);
630 621
@@ -665,13 +656,11 @@ out_eoi:
665 656
666/** 657/**
667 * handle_percpu_irq - Per CPU local irq handler 658 * handle_percpu_irq - Per CPU local irq handler
668 * @irq: the interrupt number
669 * @desc: the interrupt description structure for this irq 659 * @desc: the interrupt description structure for this irq
670 * 660 *
671 * Per CPU interrupts on SMP machines without locking requirements 661 * Per CPU interrupts on SMP machines without locking requirements
672 */ 662 */
673void 663void handle_percpu_irq(struct irq_desc *desc)
674handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
675{ 664{
676 struct irq_chip *chip = irq_desc_get_chip(desc); 665 struct irq_chip *chip = irq_desc_get_chip(desc);
677 666
@@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
688 677
689/** 678/**
690 * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids 679 * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
691 * @irq: the interrupt number
692 * @desc: the interrupt description structure for this irq 680 * @desc: the interrupt description structure for this irq
693 * 681 *
694 * Per CPU interrupts on SMP machines without locking requirements. Same as 682 * Per CPU interrupts on SMP machines without locking requirements. Same as
@@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
698 * contain the real device id for the cpu on which this handler is 686 * contain the real device id for the cpu on which this handler is
699 * called 687 * called
700 */ 688 */
701void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) 689void handle_percpu_devid_irq(struct irq_desc *desc)
702{ 690{
703 struct irq_chip *chip = irq_desc_get_chip(desc); 691 struct irq_chip *chip = irq_desc_get_chip(desc);
704 struct irqaction *action = desc->action; 692 struct irqaction *action = desc->action;
705 void *dev_id = raw_cpu_ptr(action->percpu_dev_id); 693 void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
694 unsigned int irq = irq_desc_get_irq(desc);
706 irqreturn_t res; 695 irqreturn_t res;
707 696
708 kstat_incr_irqs_this_cpu(desc); 697 kstat_incr_irqs_this_cpu(desc);
@@ -796,7 +785,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
796 return; 785 return;
797 786
798 __irq_do_set_handler(desc, handle, 1, NULL); 787 __irq_do_set_handler(desc, handle, 1, NULL);
799 desc->irq_data.handler_data = data; 788 desc->irq_common_data.handler_data = data;
800 789
801 irq_put_desc_busunlock(desc, flags); 790 irq_put_desc_busunlock(desc, flags);
802} 791}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index b6eeea8a80c5..de41a68fc038 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -27,8 +27,10 @@
27 * 27 *
28 * Handles spurious and unhandled IRQ's. It also prints a debugmessage. 28 * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
29 */ 29 */
30void handle_bad_irq(unsigned int irq, struct irq_desc *desc) 30void handle_bad_irq(struct irq_desc *desc)
31{ 31{
32 unsigned int irq = irq_desc_get_irq(desc);
33
32 print_irq_desc(irq, desc); 34 print_irq_desc(irq, desc);
33 kstat_incr_irqs_this_cpu(desc); 35 kstat_incr_irqs_this_cpu(desc);
34 ack_bad_irq(irq); 36 ack_bad_irq(irq);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index eee4b385cffb..5ef0c2dbe930 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
194 194
195static inline int irq_desc_get_node(struct irq_desc *desc) 195static inline int irq_desc_get_node(struct irq_desc *desc)
196{ 196{
197 return irq_data_get_node(&desc->irq_data); 197 return irq_common_data_get_node(&desc->irq_common_data);
198} 198}
199 199
200#ifdef CONFIG_PM_SLEEP 200#ifdef CONFIG_PM_SLEEP
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 0a2a4b697bcb..239e2ae2c947 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void)
38#ifdef CONFIG_SMP 38#ifdef CONFIG_SMP
39static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) 39static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
40{ 40{
41 if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) 41 if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
42 gfp, node))
42 return -ENOMEM; 43 return -ENOMEM;
43 44
44#ifdef CONFIG_GENERIC_PENDING_IRQ 45#ifdef CONFIG_GENERIC_PENDING_IRQ
45 if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { 46 if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
46 free_cpumask_var(desc->irq_data.affinity); 47 free_cpumask_var(desc->irq_common_data.affinity);
47 return -ENOMEM; 48 return -ENOMEM;
48 } 49 }
49#endif 50#endif
@@ -52,11 +53,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
52 53
53static void desc_smp_init(struct irq_desc *desc, int node) 54static void desc_smp_init(struct irq_desc *desc, int node)
54{ 55{
55 desc->irq_data.node = node; 56 cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity);
56 cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
57#ifdef CONFIG_GENERIC_PENDING_IRQ 57#ifdef CONFIG_GENERIC_PENDING_IRQ
58 cpumask_clear(desc->pending_mask); 58 cpumask_clear(desc->pending_mask);
59#endif 59#endif
60#ifdef CONFIG_NUMA
61 desc->irq_common_data.node = node;
62#endif
60} 63}
61 64
62#else 65#else
@@ -70,12 +73,13 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
70{ 73{
71 int cpu; 74 int cpu;
72 75
76 desc->irq_common_data.handler_data = NULL;
77 desc->irq_common_data.msi_desc = NULL;
78
73 desc->irq_data.common = &desc->irq_common_data; 79 desc->irq_data.common = &desc->irq_common_data;
74 desc->irq_data.irq = irq; 80 desc->irq_data.irq = irq;
75 desc->irq_data.chip = &no_irq_chip; 81 desc->irq_data.chip = &no_irq_chip;
76 desc->irq_data.chip_data = NULL; 82 desc->irq_data.chip_data = NULL;
77 desc->irq_data.handler_data = NULL;
78 desc->irq_data.msi_desc = NULL;
79 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); 83 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
80 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); 84 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
81 desc->handle_irq = handle_bad_irq; 85 desc->handle_irq = handle_bad_irq;
@@ -121,7 +125,7 @@ static void free_masks(struct irq_desc *desc)
121#ifdef CONFIG_GENERIC_PENDING_IRQ 125#ifdef CONFIG_GENERIC_PENDING_IRQ
122 free_cpumask_var(desc->pending_mask); 126 free_cpumask_var(desc->pending_mask);
123#endif 127#endif
124 free_cpumask_var(desc->irq_data.affinity); 128 free_cpumask_var(desc->irq_common_data.affinity);
125} 129}
126#else 130#else
127static inline void free_masks(struct irq_desc *desc) { } 131static inline void free_masks(struct irq_desc *desc) { }
@@ -343,7 +347,7 @@ int generic_handle_irq(unsigned int irq)
343 347
344 if (!desc) 348 if (!desc)
345 return -EINVAL; 349 return -EINVAL;
346 generic_handle_irq_desc(irq, desc); 350 generic_handle_irq_desc(desc);
347 return 0; 351 return 0;
348} 352}
349EXPORT_SYMBOL_GPL(generic_handle_irq); 353EXPORT_SYMBOL_GPL(generic_handle_irq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 79baaf8a7813..dc9d27c0c158 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain,
844 child->parent_data = irq_data; 844 child->parent_data = irq_data;
845 irq_data->irq = child->irq; 845 irq_data->irq = child->irq;
846 irq_data->common = child->common; 846 irq_data->common = child->common;
847 irq_data->node = child->node;
848 irq_data->domain = domain; 847 irq_data->domain = domain;
849 } 848 }
850 849
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad1b064f94fe..f9a59f6cabd2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
192 switch (ret) { 192 switch (ret) {
193 case IRQ_SET_MASK_OK: 193 case IRQ_SET_MASK_OK:
194 case IRQ_SET_MASK_OK_DONE: 194 case IRQ_SET_MASK_OK_DONE:
195 cpumask_copy(data->affinity, mask); 195 cpumask_copy(desc->irq_common_data.affinity, mask);
196 case IRQ_SET_MASK_OK_NOCOPY: 196 case IRQ_SET_MASK_OK_NOCOPY:
197 irq_set_thread_affinity(desc); 197 irq_set_thread_affinity(desc);
198 ret = 0; 198 ret = 0;
@@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work)
304 if (irq_move_pending(&desc->irq_data)) 304 if (irq_move_pending(&desc->irq_data))
305 irq_get_pending(cpumask, desc); 305 irq_get_pending(cpumask, desc);
306 else 306 else
307 cpumask_copy(cpumask, desc->irq_data.affinity); 307 cpumask_copy(cpumask, desc->irq_common_data.affinity);
308 raw_spin_unlock_irqrestore(&desc->lock, flags); 308 raw_spin_unlock_irqrestore(&desc->lock, flags);
309 309
310 notify->notify(notify, cpumask); 310 notify->notify(notify, cpumask);
@@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
375 * one of the targets is online. 375 * one of the targets is online.
376 */ 376 */
377 if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { 377 if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
378 if (cpumask_intersects(desc->irq_data.affinity, 378 if (cpumask_intersects(desc->irq_common_data.affinity,
379 cpu_online_mask)) 379 cpu_online_mask))
380 set = desc->irq_data.affinity; 380 set = desc->irq_common_data.affinity;
381 else 381 else
382 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); 382 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
383 } 383 }
@@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
829 * This code is triggered unconditionally. Check the affinity 829 * This code is triggered unconditionally. Check the affinity
830 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. 830 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
831 */ 831 */
832 if (desc->irq_data.affinity) 832 if (desc->irq_common_data.affinity)
833 cpumask_copy(mask, desc->irq_data.affinity); 833 cpumask_copy(mask, desc->irq_common_data.affinity);
834 else 834 else
835 valid = false; 835 valid = false;
836 raw_spin_unlock_irq(&desc->lock); 836 raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0e97c142ce40..e3a8c9577ba6 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -39,7 +39,7 @@ static struct proc_dir_entry *root_irq_dir;
39static int show_irq_affinity(int type, struct seq_file *m, void *v) 39static int show_irq_affinity(int type, struct seq_file *m, void *v)
40{ 40{
41 struct irq_desc *desc = irq_to_desc((long)m->private); 41 struct irq_desc *desc = irq_to_desc((long)m->private);
42 const struct cpumask *mask = desc->irq_data.affinity; 42 const struct cpumask *mask = desc->irq_common_data.affinity;
43 43
44#ifdef CONFIG_GENERIC_PENDING_IRQ 44#ifdef CONFIG_GENERIC_PENDING_IRQ
45 if (irqd_is_setaffinity_pending(&desc->irq_data)) 45 if (irqd_is_setaffinity_pending(&desc->irq_data))
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index dd95f44f99b2..b86886beee4f 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg)
38 clear_bit(irq, irqs_resend); 38 clear_bit(irq, irqs_resend);
39 desc = irq_to_desc(irq); 39 desc = irq_to_desc(irq);
40 local_irq_disable(); 40 local_irq_disable();
41 desc->handle_irq(irq, desc); 41 desc->handle_irq(desc);
42 local_irq_enable(); 42 local_irq_enable();
43 } 43 }
44} 44}
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 337c8818541d..87e9ce6a63c5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
289 if (pv_enabled()) 289 if (pv_enabled())
290 goto queue; 290 goto queue;
291 291
292 if (virt_queued_spin_lock(lock)) 292 if (virt_spin_lock(lock))
293 return; 293 return;
294 294
295 /* 295 /*
diff --git a/kernel/membarrier.c b/kernel/membarrier.c
new file mode 100644
index 000000000000..536c727a56e9
--- /dev/null
+++ b/kernel/membarrier.c
@@ -0,0 +1,66 @@
1/*
2 * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
3 *
4 * membarrier system call
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 */
16
17#include <linux/syscalls.h>
18#include <linux/membarrier.h>
19
20/*
21 * Bitmask made from a "or" of all commands within enum membarrier_cmd,
22 * except MEMBARRIER_CMD_QUERY.
23 */
24#define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED)
25
26/**
27 * sys_membarrier - issue memory barriers on a set of threads
28 * @cmd: Takes command values defined in enum membarrier_cmd.
29 * @flags: Currently needs to be 0. For future extensions.
30 *
31 * If this system call is not implemented, -ENOSYS is returned. If the
32 * command specified does not exist, or if the command argument is invalid,
33 * this system call returns -EINVAL. For a given command, with flags argument
34 * set to 0, this system call is guaranteed to always return the same value
35 * until reboot.
36 *
37 * All memory accesses performed in program order from each targeted thread
38 * is guaranteed to be ordered with respect to sys_membarrier(). If we use
39 * the semantic "barrier()" to represent a compiler barrier forcing memory
40 * accesses to be performed in program order across the barrier, and
41 * smp_mb() to represent explicit memory barriers forcing full memory
42 * ordering across the barrier, we have the following ordering table for
43 * each pair of barrier(), sys_membarrier() and smp_mb():
44 *
45 * The pair ordering is detailed as (O: ordered, X: not ordered):
46 *
47 * barrier() smp_mb() sys_membarrier()
48 * barrier() X X O
49 * smp_mb() X O O
50 * sys_membarrier() O O O
51 */
52SYSCALL_DEFINE2(membarrier, int, cmd, int, flags)
53{
54 if (unlikely(flags))
55 return -EINVAL;
56 switch (cmd) {
57 case MEMBARRIER_CMD_QUERY:
58 return MEMBARRIER_CMD_BITMASK;
59 case MEMBARRIER_CMD_SHARED:
60 if (num_online_cpus() > 1)
61 synchronize_sched();
62 return 0;
63 default:
64 return -EINVAL;
65 }
66}
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3595403921bd..2f9c92884817 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -621,18 +621,21 @@ int get_nohz_timer_target(void)
621 int i, cpu = smp_processor_id(); 621 int i, cpu = smp_processor_id();
622 struct sched_domain *sd; 622 struct sched_domain *sd;
623 623
624 if (!idle_cpu(cpu)) 624 if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
625 return cpu; 625 return cpu;
626 626
627 rcu_read_lock(); 627 rcu_read_lock();
628 for_each_domain(cpu, sd) { 628 for_each_domain(cpu, sd) {
629 for_each_cpu(i, sched_domain_span(sd)) { 629 for_each_cpu(i, sched_domain_span(sd)) {
630 if (!idle_cpu(i)) { 630 if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
631 cpu = i; 631 cpu = i;
632 goto unlock; 632 goto unlock;
633 } 633 }
634 } 634 }
635 } 635 }
636
637 if (!is_housekeeping_cpu(cpu))
638 cpu = housekeeping_any_cpu();
636unlock: 639unlock:
637 rcu_read_unlock(); 640 rcu_read_unlock();
638 return cpu; 641 return cpu;
@@ -2666,13 +2669,20 @@ unsigned long nr_running(void)
2666 2669
2667/* 2670/*
2668 * Check if only the current task is running on the cpu. 2671 * Check if only the current task is running on the cpu.
2672 *
2673 * Caution: this function does not check that the caller has disabled
2674 * preemption, thus the result might have a time-of-check-to-time-of-use
2675 * race. The caller is responsible to use it correctly, for example:
2676 *
2677 * - from a non-preemptable section (of course)
2678 *
2679 * - from a thread that is bound to a single CPU
2680 *
2681 * - in a loop with very short iterations (e.g. a polling loop)
2669 */ 2682 */
2670bool single_task_running(void) 2683bool single_task_running(void)
2671{ 2684{
2672 if (cpu_rq(smp_processor_id())->nr_running == 1) 2685 return raw_rq()->nr_running == 1;
2673 return true;
2674 else
2675 return false;
2676} 2686}
2677EXPORT_SYMBOL(single_task_running); 2687EXPORT_SYMBOL(single_task_running);
2678 2688
@@ -5178,24 +5188,47 @@ static void migrate_tasks(struct rq *dead_rq)
5178 break; 5188 break;
5179 5189
5180 /* 5190 /*
5181 * Ensure rq->lock covers the entire task selection 5191 * pick_next_task assumes pinned rq->lock.
5182 * until the migration.
5183 */ 5192 */
5184 lockdep_pin_lock(&rq->lock); 5193 lockdep_pin_lock(&rq->lock);
5185 next = pick_next_task(rq, &fake_task); 5194 next = pick_next_task(rq, &fake_task);
5186 BUG_ON(!next); 5195 BUG_ON(!next);
5187 next->sched_class->put_prev_task(rq, next); 5196 next->sched_class->put_prev_task(rq, next);
5188 5197
5198 /*
5199 * Rules for changing task_struct::cpus_allowed are holding
5200 * both pi_lock and rq->lock, such that holding either
5201 * stabilizes the mask.
5202 *
5203 * Drop rq->lock is not quite as disastrous as it usually is
5204 * because !cpu_active at this point, which means load-balance
5205 * will not interfere. Also, stop-machine.
5206 */
5207 lockdep_unpin_lock(&rq->lock);
5208 raw_spin_unlock(&rq->lock);
5209 raw_spin_lock(&next->pi_lock);
5210 raw_spin_lock(&rq->lock);
5211
5212 /*
5213 * Since we're inside stop-machine, _nothing_ should have
5214 * changed the task, WARN if weird stuff happened, because in
5215 * that case the above rq->lock drop is a fail too.
5216 */
5217 if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
5218 raw_spin_unlock(&next->pi_lock);
5219 continue;
5220 }
5221
5189 /* Find suitable destination for @next, with force if needed. */ 5222 /* Find suitable destination for @next, with force if needed. */
5190 dest_cpu = select_fallback_rq(dead_rq->cpu, next); 5223 dest_cpu = select_fallback_rq(dead_rq->cpu, next);
5191 5224
5192 lockdep_unpin_lock(&rq->lock);
5193 rq = __migrate_task(rq, next, dest_cpu); 5225 rq = __migrate_task(rq, next, dest_cpu);
5194 if (rq != dead_rq) { 5226 if (rq != dead_rq) {
5195 raw_spin_unlock(&rq->lock); 5227 raw_spin_unlock(&rq->lock);
5196 rq = dead_rq; 5228 rq = dead_rq;
5197 raw_spin_lock(&rq->lock); 5229 raw_spin_lock(&rq->lock);
5198 } 5230 }
5231 raw_spin_unlock(&next->pi_lock);
5199 } 5232 }
5200 5233
5201 rq->stop = stop; 5234 rq->stop = stop;
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 272d9322bc5d..052e02672d12 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -106,10 +106,9 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
106} 106}
107EXPORT_SYMBOL_GPL(__wake_up_locked); 107EXPORT_SYMBOL_GPL(__wake_up_locked);
108 108
109void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, 109void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
110 void *key)
111{ 110{
112 __wake_up_common(q, mode, nr, 0, key); 111 __wake_up_common(q, mode, 1, 0, key);
113} 112}
114EXPORT_SYMBOL_GPL(__wake_up_locked_key); 113EXPORT_SYMBOL_GPL(__wake_up_locked_key);
115 114
@@ -284,7 +283,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
284 if (!list_empty(&wait->task_list)) 283 if (!list_empty(&wait->task_list))
285 list_del_init(&wait->task_list); 284 list_del_init(&wait->task_list);
286 else if (waitqueue_active(q)) 285 else if (waitqueue_active(q))
287 __wake_up_locked_key(q, mode, 1, key); 286 __wake_up_locked_key(q, mode, key);
288 spin_unlock_irqrestore(&q->lock, flags); 287 spin_unlock_irqrestore(&q->lock, flags);
289} 288}
290EXPORT_SYMBOL(abort_exclusive_wait); 289EXPORT_SYMBOL(abort_exclusive_wait);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 03c3875d9958..a02decf15583 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -245,3 +245,6 @@ cond_syscall(sys_bpf);
245 245
246/* execveat */ 246/* execveat */
247cond_syscall(sys_execveat); 247cond_syscall(sys_execveat);
248
249/* membarrier */
250cond_syscall(sys_membarrier);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 50eb107f1198..a9b76a40319e 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns);
97static int __clockevents_switch_state(struct clock_event_device *dev, 97static int __clockevents_switch_state(struct clock_event_device *dev,
98 enum clock_event_state state) 98 enum clock_event_state state)
99{ 99{
100 /* Transition with legacy set_mode() callback */
101 if (dev->set_mode) {
102 /* Legacy callback doesn't support new modes */
103 if (state > CLOCK_EVT_STATE_ONESHOT)
104 return -ENOSYS;
105 /*
106 * 'clock_event_state' and 'clock_event_mode' have 1-to-1
107 * mapping until *_ONESHOT, and so a simple cast will work.
108 */
109 dev->set_mode((enum clock_event_mode)state, dev);
110 dev->mode = (enum clock_event_mode)state;
111 return 0;
112 }
113
114 if (dev->features & CLOCK_EVT_FEAT_DUMMY) 100 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
115 return 0; 101 return 0;
116 102
@@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev)
204{ 190{
205 int ret = 0; 191 int ret = 0;
206 192
207 if (dev->set_mode) { 193 if (dev->tick_resume)
208 dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
209 dev->mode = CLOCK_EVT_MODE_RESUME;
210 } else if (dev->tick_resume) {
211 ret = dev->tick_resume(dev); 194 ret = dev->tick_resume(dev);
212 }
213 195
214 return ret; 196 return ret;
215} 197}
@@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
460} 442}
461EXPORT_SYMBOL_GPL(clockevents_unbind_device); 443EXPORT_SYMBOL_GPL(clockevents_unbind_device);
462 444
463/* Sanity check of state transition callbacks */
464static int clockevents_sanity_check(struct clock_event_device *dev)
465{
466 /* Legacy set_mode() callback */
467 if (dev->set_mode) {
468 /* We shouldn't be supporting new modes now */
469 WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
470 dev->set_state_shutdown || dev->tick_resume ||
471 dev->set_state_oneshot_stopped);
472
473 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
474 return 0;
475 }
476
477 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
478 return 0;
479
480 return 0;
481}
482
483/** 445/**
484 * clockevents_register_device - register a clock event device 446 * clockevents_register_device - register a clock event device
485 * @dev: device to register 447 * @dev: device to register
@@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev)
488{ 450{
489 unsigned long flags; 451 unsigned long flags;
490 452
491 BUG_ON(clockevents_sanity_check(dev));
492
493 /* Initialize state to DETACHED */ 453 /* Initialize state to DETACHED */
494 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 454 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
495 455
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d11c55b6ab7d..4fcd99e12aa0 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu)
398 * the set mode function! 398 * the set mode function!
399 */ 399 */
400 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 400 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
401 dev->mode = CLOCK_EVT_MODE_UNUSED;
402 clockevents_exchange_device(dev, NULL); 401 clockevents_exchange_device(dev, NULL);
403 dev->event_handler = clockevents_handle_noop; 402 dev->event_handler = clockevents_handle_noop;
404 td->evtdev = NULL; 403 td->evtdev = NULL;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3319e16f31e5..7c7ec4515983 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str)
290__setup("nohz_full=", tick_nohz_full_setup); 290__setup("nohz_full=", tick_nohz_full_setup);
291 291
292static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, 292static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
293 unsigned long action, 293 unsigned long action,
294 void *hcpu) 294 void *hcpu)
295{ 295{
296 unsigned int cpu = (unsigned long)hcpu; 296 unsigned int cpu = (unsigned long)hcpu;
297 297
298 switch (action & ~CPU_TASKS_FROZEN) { 298 switch (action & ~CPU_TASKS_FROZEN) {
299 case CPU_DOWN_PREPARE: 299 case CPU_DOWN_PREPARE:
300 /* 300 /*
301 * If we handle the timekeeping duty for full dynticks CPUs, 301 * The boot CPU handles housekeeping duty (unbound timers,
302 * we can't safely shutdown that CPU. 302 * workqueues, timekeeping, ...) on behalf of full dynticks
303 * CPUs. It must remain online when nohz full is enabled.
303 */ 304 */
304 if (tick_nohz_full_running && tick_do_timer_cpu == cpu) 305 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
305 return NOTIFY_BAD; 306 return NOTIFY_BAD;
@@ -370,6 +371,12 @@ void __init tick_nohz_init(void)
370 cpu_notifier(tick_nohz_cpu_down_callback, 0); 371 cpu_notifier(tick_nohz_cpu_down_callback, 0);
371 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", 372 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
372 cpumask_pr_args(tick_nohz_full_mask)); 373 cpumask_pr_args(tick_nohz_full_mask));
374
375 /*
376 * We need at least one CPU to handle housekeeping work such
377 * as timekeeping, unbound timers, workqueues, ...
378 */
379 WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
373} 380}
374#endif 381#endif
375 382
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f6ee2e6b6f5d..3739ac6aa473 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
1614 negative = (tick_error < 0); 1614 negative = (tick_error < 0);
1615 1615
1616 /* Sort out the magnitude of the correction */ 1616 /* Sort out the magnitude of the correction */
1617 tick_error = abs(tick_error); 1617 tick_error = abs64(tick_error);
1618 for (adj = 0; tick_error > interval; adj++) 1618 for (adj = 0; tick_error > interval; adj++)
1619 tick_error >>= 1; 1619 tick_error >>= 1;
1620 1620
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 129c96033e46..f75e35b60149 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
225 (unsigned long long) dev->min_delta_ns); 225 (unsigned long long) dev->min_delta_ns);
226 SEQ_printf(m, " mult: %u\n", dev->mult); 226 SEQ_printf(m, " mult: %u\n", dev->mult);
227 SEQ_printf(m, " shift: %u\n", dev->shift); 227 SEQ_printf(m, " shift: %u\n", dev->shift);
228 SEQ_printf(m, " mode: %d\n", dev->mode); 228 SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev));
229 SEQ_printf(m, " next_event: %Ld nsecs\n", 229 SEQ_printf(m, " next_event: %Ld nsecs\n",
230 (unsigned long long) ktime_to_ns(dev->next_event)); 230 (unsigned long long) ktime_to_ns(dev->next_event));
231 231
@@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
233 print_name_offset(m, dev->set_next_event); 233 print_name_offset(m, dev->set_next_event);
234 SEQ_printf(m, "\n"); 234 SEQ_printf(m, "\n");
235 235
236 if (dev->set_mode) { 236 if (dev->set_state_shutdown) {
237 SEQ_printf(m, " set_mode: "); 237 SEQ_printf(m, " shutdown: ");
238 print_name_offset(m, dev->set_mode); 238 print_name_offset(m, dev->set_state_shutdown);
239 SEQ_printf(m, "\n"); 239 SEQ_printf(m, "\n");
240 } else { 240 }
241 if (dev->set_state_shutdown) {
242 SEQ_printf(m, " shutdown: ");
243 print_name_offset(m, dev->set_state_shutdown);
244 SEQ_printf(m, "\n");
245 }
246 241
247 if (dev->set_state_periodic) { 242 if (dev->set_state_periodic) {
248 SEQ_printf(m, " periodic: "); 243 SEQ_printf(m, " periodic: ");
249 print_name_offset(m, dev->set_state_periodic); 244 print_name_offset(m, dev->set_state_periodic);
250 SEQ_printf(m, "\n"); 245 SEQ_printf(m, "\n");
251 } 246 }
252 247
253 if (dev->set_state_oneshot) { 248 if (dev->set_state_oneshot) {
254 SEQ_printf(m, " oneshot: "); 249 SEQ_printf(m, " oneshot: ");
255 print_name_offset(m, dev->set_state_oneshot); 250 print_name_offset(m, dev->set_state_oneshot);
256 SEQ_printf(m, "\n"); 251 SEQ_printf(m, "\n");
257 } 252 }
258 253
259 if (dev->set_state_oneshot_stopped) { 254 if (dev->set_state_oneshot_stopped) {
260 SEQ_printf(m, " oneshot stopped: "); 255 SEQ_printf(m, " oneshot stopped: ");
261 print_name_offset(m, dev->set_state_oneshot_stopped); 256 print_name_offset(m, dev->set_state_oneshot_stopped);
262 SEQ_printf(m, "\n"); 257 SEQ_printf(m, "\n");
263 } 258 }
264 259
265 if (dev->tick_resume) { 260 if (dev->tick_resume) {
266 SEQ_printf(m, " resume: "); 261 SEQ_printf(m, " resume: ");
267 print_name_offset(m, dev->tick_resume); 262 print_name_offset(m, dev->tick_resume);
268 SEQ_printf(m, "\n"); 263 SEQ_printf(m, "\n");
269 }
270 } 264 }
271 265
272 SEQ_printf(m, " event_handler: "); 266 SEQ_printf(m, " event_handler: ");