diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup.c | 110 | ||||
-rw-r--r-- | kernel/cpu_pm.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/irq/chip.c | 33 | ||||
-rw-r--r-- | kernel/irq/handle.c | 4 | ||||
-rw-r--r-- | kernel/irq/internals.h | 2 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 20 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 1 | ||||
-rw-r--r-- | kernel/irq/manage.c | 12 | ||||
-rw-r--r-- | kernel/irq/proc.c | 2 | ||||
-rw-r--r-- | kernel/irq/resend.c | 2 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 2 | ||||
-rw-r--r-- | kernel/membarrier.c | 66 | ||||
-rw-r--r-- | kernel/sched/core.c | 51 | ||||
-rw-r--r-- | kernel/sched/wait.c | 7 | ||||
-rw-r--r-- | kernel/sys_ni.c | 3 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 42 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 15 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 2 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 54 |
22 files changed, 277 insertions, 159 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index d4988410b410..53abf008ecb3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -100,6 +100,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | |||
100 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o | 100 | obj-$(CONFIG_JUMP_LABEL) += jump_label.o |
101 | obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o | 101 | obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o |
102 | obj-$(CONFIG_TORTURE_TEST) += torture.o | 102 | obj-$(CONFIG_TORTURE_TEST) += torture.o |
103 | obj-$(CONFIG_MEMBARRIER) += membarrier.o | ||
103 | 104 | ||
104 | obj-$(CONFIG_HAS_IOMEM) += memremap.o | 105 | obj-$(CONFIG_HAS_IOMEM) += memremap.o |
105 | 106 | ||
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2cf0f79f1fc9..2c9eae6ad970 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/slab.h> | 46 | #include <linux/slab.h> |
47 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
48 | #include <linux/rwsem.h> | 48 | #include <linux/rwsem.h> |
49 | #include <linux/percpu-rwsem.h> | ||
50 | #include <linux/string.h> | 49 | #include <linux/string.h> |
51 | #include <linux/sort.h> | 50 | #include <linux/sort.h> |
52 | #include <linux/kmod.h> | 51 | #include <linux/kmod.h> |
@@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); | |||
104 | */ | 103 | */ |
105 | static DEFINE_SPINLOCK(release_agent_path_lock); | 104 | static DEFINE_SPINLOCK(release_agent_path_lock); |
106 | 105 | ||
107 | struct percpu_rw_semaphore cgroup_threadgroup_rwsem; | ||
108 | |||
109 | #define cgroup_assert_mutex_or_rcu_locked() \ | 106 | #define cgroup_assert_mutex_or_rcu_locked() \ |
110 | RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | 107 | RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ |
111 | !lockdep_is_held(&cgroup_mutex), \ | 108 | !lockdep_is_held(&cgroup_mutex), \ |
@@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
874 | return cset; | 871 | return cset; |
875 | } | 872 | } |
876 | 873 | ||
874 | void cgroup_threadgroup_change_begin(struct task_struct *tsk) | ||
875 | { | ||
876 | down_read(&tsk->signal->group_rwsem); | ||
877 | } | ||
878 | |||
879 | void cgroup_threadgroup_change_end(struct task_struct *tsk) | ||
880 | { | ||
881 | up_read(&tsk->signal->group_rwsem); | ||
882 | } | ||
883 | |||
884 | /** | ||
885 | * threadgroup_lock - lock threadgroup | ||
886 | * @tsk: member task of the threadgroup to lock | ||
887 | * | ||
888 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | ||
889 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | ||
890 | * change ->group_leader/pid. This is useful for cases where the threadgroup | ||
891 | * needs to stay stable across blockable operations. | ||
892 | * | ||
893 | * fork and exit explicitly call threadgroup_change_{begin|end}() for | ||
894 | * synchronization. While held, no new task will be added to threadgroup | ||
895 | * and no existing live task will have its PF_EXITING set. | ||
896 | * | ||
897 | * de_thread() does threadgroup_change_{begin|end}() when a non-leader | ||
898 | * sub-thread becomes a new leader. | ||
899 | */ | ||
900 | static void threadgroup_lock(struct task_struct *tsk) | ||
901 | { | ||
902 | down_write(&tsk->signal->group_rwsem); | ||
903 | } | ||
904 | |||
905 | /** | ||
906 | * threadgroup_unlock - unlock threadgroup | ||
907 | * @tsk: member task of the threadgroup to unlock | ||
908 | * | ||
909 | * Reverse threadgroup_lock(). | ||
910 | */ | ||
911 | static inline void threadgroup_unlock(struct task_struct *tsk) | ||
912 | { | ||
913 | up_write(&tsk->signal->group_rwsem); | ||
914 | } | ||
915 | |||
877 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) | 916 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) |
878 | { | 917 | { |
879 | struct cgroup *root_cgrp = kf_root->kn->priv; | 918 | struct cgroup *root_cgrp = kf_root->kn->priv; |
@@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, | |||
2074 | lockdep_assert_held(&css_set_rwsem); | 2113 | lockdep_assert_held(&css_set_rwsem); |
2075 | 2114 | ||
2076 | /* | 2115 | /* |
2077 | * We are synchronized through cgroup_threadgroup_rwsem against | 2116 | * We are synchronized through threadgroup_lock() against PF_EXITING |
2078 | * PF_EXITING setting such that we can't race against cgroup_exit() | 2117 | * setting such that we can't race against cgroup_exit() changing the |
2079 | * changing the css_set to init_css_set and dropping the old one. | 2118 | * css_set to init_css_set and dropping the old one. |
2080 | */ | 2119 | */ |
2081 | WARN_ON_ONCE(tsk->flags & PF_EXITING); | 2120 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
2082 | old_cset = task_css_set(tsk); | 2121 | old_cset = task_css_set(tsk); |
@@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) | |||
2133 | * @src_cset and add it to @preloaded_csets, which should later be cleaned | 2172 | * @src_cset and add it to @preloaded_csets, which should later be cleaned |
2134 | * up by cgroup_migrate_finish(). | 2173 | * up by cgroup_migrate_finish(). |
2135 | * | 2174 | * |
2136 | * This function may be called without holding cgroup_threadgroup_rwsem | 2175 | * This function may be called without holding threadgroup_lock even if the |
2137 | * even if the target is a process. Threads may be created and destroyed | 2176 | * target is a process. Threads may be created and destroyed but as long |
2138 | * but as long as cgroup_mutex is not dropped, no new css_set can be put | 2177 | * as cgroup_mutex is not dropped, no new css_set can be put into play and |
2139 | * into play and the preloaded css_sets are guaranteed to cover all | 2178 | * the preloaded css_sets are guaranteed to cover all migrations. |
2140 | * migrations. | ||
2141 | */ | 2179 | */ |
2142 | static void cgroup_migrate_add_src(struct css_set *src_cset, | 2180 | static void cgroup_migrate_add_src(struct css_set *src_cset, |
2143 | struct cgroup *dst_cgrp, | 2181 | struct cgroup *dst_cgrp, |
@@ -2240,7 +2278,7 @@ err: | |||
2240 | * @threadgroup: whether @leader points to the whole process or a single task | 2278 | * @threadgroup: whether @leader points to the whole process or a single task |
2241 | * | 2279 | * |
2242 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a | 2280 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a |
2243 | * process, the caller must be holding cgroup_threadgroup_rwsem. The | 2281 | * process, the caller must be holding threadgroup_lock of @leader. The |
2244 | * caller is also responsible for invoking cgroup_migrate_add_src() and | 2282 | * caller is also responsible for invoking cgroup_migrate_add_src() and |
2245 | * cgroup_migrate_prepare_dst() on the targets before invoking this | 2283 | * cgroup_migrate_prepare_dst() on the targets before invoking this |
2246 | * function and following up with cgroup_migrate_finish(). | 2284 | * function and following up with cgroup_migrate_finish(). |
@@ -2368,7 +2406,7 @@ out_release_tset: | |||
2368 | * @leader: the task or the leader of the threadgroup to be attached | 2406 | * @leader: the task or the leader of the threadgroup to be attached |
2369 | * @threadgroup: attach the whole threadgroup? | 2407 | * @threadgroup: attach the whole threadgroup? |
2370 | * | 2408 | * |
2371 | * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. | 2409 | * Call holding cgroup_mutex and threadgroup_lock of @leader. |
2372 | */ | 2410 | */ |
2373 | static int cgroup_attach_task(struct cgroup *dst_cgrp, | 2411 | static int cgroup_attach_task(struct cgroup *dst_cgrp, |
2374 | struct task_struct *leader, bool threadgroup) | 2412 | struct task_struct *leader, bool threadgroup) |
@@ -2460,13 +2498,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2460 | if (!cgrp) | 2498 | if (!cgrp) |
2461 | return -ENODEV; | 2499 | return -ENODEV; |
2462 | 2500 | ||
2463 | percpu_down_write(&cgroup_threadgroup_rwsem); | 2501 | retry_find_task: |
2464 | rcu_read_lock(); | 2502 | rcu_read_lock(); |
2465 | if (pid) { | 2503 | if (pid) { |
2466 | tsk = find_task_by_vpid(pid); | 2504 | tsk = find_task_by_vpid(pid); |
2467 | if (!tsk) { | 2505 | if (!tsk) { |
2506 | rcu_read_unlock(); | ||
2468 | ret = -ESRCH; | 2507 | ret = -ESRCH; |
2469 | goto out_unlock_rcu; | 2508 | goto out_unlock_cgroup; |
2470 | } | 2509 | } |
2471 | } else { | 2510 | } else { |
2472 | tsk = current; | 2511 | tsk = current; |
@@ -2482,23 +2521,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2482 | */ | 2521 | */ |
2483 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { | 2522 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { |
2484 | ret = -EINVAL; | 2523 | ret = -EINVAL; |
2485 | goto out_unlock_rcu; | 2524 | rcu_read_unlock(); |
2525 | goto out_unlock_cgroup; | ||
2486 | } | 2526 | } |
2487 | 2527 | ||
2488 | get_task_struct(tsk); | 2528 | get_task_struct(tsk); |
2489 | rcu_read_unlock(); | 2529 | rcu_read_unlock(); |
2490 | 2530 | ||
2531 | threadgroup_lock(tsk); | ||
2532 | if (threadgroup) { | ||
2533 | if (!thread_group_leader(tsk)) { | ||
2534 | /* | ||
2535 | * a race with de_thread from another thread's exec() | ||
2536 | * may strip us of our leadership, if this happens, | ||
2537 | * there is no choice but to throw this task away and | ||
2538 | * try again; this is | ||
2539 | * "double-double-toil-and-trouble-check locking". | ||
2540 | */ | ||
2541 | threadgroup_unlock(tsk); | ||
2542 | put_task_struct(tsk); | ||
2543 | goto retry_find_task; | ||
2544 | } | ||
2545 | } | ||
2546 | |||
2491 | ret = cgroup_procs_write_permission(tsk, cgrp, of); | 2547 | ret = cgroup_procs_write_permission(tsk, cgrp, of); |
2492 | if (!ret) | 2548 | if (!ret) |
2493 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); | 2549 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); |
2494 | 2550 | ||
2495 | put_task_struct(tsk); | 2551 | threadgroup_unlock(tsk); |
2496 | goto out_unlock_threadgroup; | ||
2497 | 2552 | ||
2498 | out_unlock_rcu: | 2553 | put_task_struct(tsk); |
2499 | rcu_read_unlock(); | 2554 | out_unlock_cgroup: |
2500 | out_unlock_threadgroup: | ||
2501 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2502 | cgroup_kn_unlock(of->kn); | 2555 | cgroup_kn_unlock(of->kn); |
2503 | return ret ?: nbytes; | 2556 | return ret ?: nbytes; |
2504 | } | 2557 | } |
@@ -2643,8 +2696,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2643 | 2696 | ||
2644 | lockdep_assert_held(&cgroup_mutex); | 2697 | lockdep_assert_held(&cgroup_mutex); |
2645 | 2698 | ||
2646 | percpu_down_write(&cgroup_threadgroup_rwsem); | ||
2647 | |||
2648 | /* look up all csses currently attached to @cgrp's subtree */ | 2699 | /* look up all csses currently attached to @cgrp's subtree */ |
2649 | down_read(&css_set_rwsem); | 2700 | down_read(&css_set_rwsem); |
2650 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { | 2701 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { |
@@ -2700,8 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2700 | goto out_finish; | 2751 | goto out_finish; |
2701 | last_task = task; | 2752 | last_task = task; |
2702 | 2753 | ||
2754 | threadgroup_lock(task); | ||
2755 | /* raced against de_thread() from another thread? */ | ||
2756 | if (!thread_group_leader(task)) { | ||
2757 | threadgroup_unlock(task); | ||
2758 | put_task_struct(task); | ||
2759 | continue; | ||
2760 | } | ||
2761 | |||
2703 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); | 2762 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); |
2704 | 2763 | ||
2764 | threadgroup_unlock(task); | ||
2705 | put_task_struct(task); | 2765 | put_task_struct(task); |
2706 | 2766 | ||
2707 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) | 2767 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) |
@@ -2711,7 +2771,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2711 | 2771 | ||
2712 | out_finish: | 2772 | out_finish: |
2713 | cgroup_migrate_finish(&preloaded_csets); | 2773 | cgroup_migrate_finish(&preloaded_csets); |
2714 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2715 | return ret; | 2774 | return ret; |
2716 | } | 2775 | } |
2717 | 2776 | ||
@@ -5024,7 +5083,6 @@ int __init cgroup_init(void) | |||
5024 | unsigned long key; | 5083 | unsigned long key; |
5025 | int ssid, err; | 5084 | int ssid, err; |
5026 | 5085 | ||
5027 | BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); | ||
5028 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); | 5086 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); |
5029 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); | 5087 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); |
5030 | 5088 | ||
diff --git a/kernel/cpu_pm.c b/kernel/cpu_pm.c index 9656a3c36503..009cc9a17d95 100644 --- a/kernel/cpu_pm.c +++ b/kernel/cpu_pm.c | |||
@@ -180,7 +180,7 @@ EXPORT_SYMBOL_GPL(cpu_cluster_pm_enter); | |||
180 | * low power state that may have caused some blocks in the same power domain | 180 | * low power state that may have caused some blocks in the same power domain |
181 | * to reset. | 181 | * to reset. |
182 | * | 182 | * |
183 | * Must be called after cpu_pm_exit has been called on all cpus in the power | 183 | * Must be called after cpu_cluster_pm_enter has been called for the power |
184 | * domain, and before cpu_pm_exit has been called on any cpu in the power | 184 | * domain, and before cpu_pm_exit has been called on any cpu in the power |
185 | * domain. Notified drivers can include VFP co-processor, interrupt controller | 185 | * domain. Notified drivers can include VFP co-processor, interrupt controller |
186 | * and its PM extensions, local CPU timers context save/restore which | 186 | * and its PM extensions, local CPU timers context save/restore which |
diff --git a/kernel/fork.c b/kernel/fork.c index 7d5f0f118a63..2845623fb582 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1149,6 +1149,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1149 | tty_audit_fork(sig); | 1149 | tty_audit_fork(sig); |
1150 | sched_autogroup_fork(sig); | 1150 | sched_autogroup_fork(sig); |
1151 | 1151 | ||
1152 | #ifdef CONFIG_CGROUPS | ||
1153 | init_rwsem(&sig->group_rwsem); | ||
1154 | #endif | ||
1155 | |||
1152 | sig->oom_score_adj = current->signal->oom_score_adj; | 1156 | sig->oom_score_adj = current->signal->oom_score_adj; |
1153 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1157 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
1154 | 1158 | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6e40a9539763..e28169dd1c36 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data) | |||
83 | 83 | ||
84 | if (!desc) | 84 | if (!desc) |
85 | return -EINVAL; | 85 | return -EINVAL; |
86 | desc->irq_data.handler_data = data; | 86 | desc->irq_common_data.handler_data = data; |
87 | irq_put_desc_unlock(desc, flags); | 87 | irq_put_desc_unlock(desc, flags); |
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
@@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, | |||
105 | 105 | ||
106 | if (!desc) | 106 | if (!desc) |
107 | return -EINVAL; | 107 | return -EINVAL; |
108 | desc->irq_data.msi_desc = entry; | 108 | desc->irq_common_data.msi_desc = entry; |
109 | if (entry && !irq_offset) | 109 | if (entry && !irq_offset) |
110 | entry->irq = irq_base; | 110 | entry->irq = irq_base; |
111 | irq_put_desc_unlock(desc, flags); | 111 | irq_put_desc_unlock(desc, flags); |
@@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc) | |||
372 | 372 | ||
373 | /** | 373 | /** |
374 | * handle_simple_irq - Simple and software-decoded IRQs. | 374 | * handle_simple_irq - Simple and software-decoded IRQs. |
375 | * @irq: the interrupt number | ||
376 | * @desc: the interrupt description structure for this irq | 375 | * @desc: the interrupt description structure for this irq |
377 | * | 376 | * |
378 | * Simple interrupts are either sent from a demultiplexing interrupt | 377 | * Simple interrupts are either sent from a demultiplexing interrupt |
@@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc) | |||
382 | * Note: The caller is expected to handle the ack, clear, mask and | 381 | * Note: The caller is expected to handle the ack, clear, mask and |
383 | * unmask issues if necessary. | 382 | * unmask issues if necessary. |
384 | */ | 383 | */ |
385 | void | 384 | void handle_simple_irq(struct irq_desc *desc) |
386 | handle_simple_irq(unsigned int irq, struct irq_desc *desc) | ||
387 | { | 385 | { |
388 | raw_spin_lock(&desc->lock); | 386 | raw_spin_lock(&desc->lock); |
389 | 387 | ||
@@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc) | |||
425 | 423 | ||
426 | /** | 424 | /** |
427 | * handle_level_irq - Level type irq handler | 425 | * handle_level_irq - Level type irq handler |
428 | * @irq: the interrupt number | ||
429 | * @desc: the interrupt description structure for this irq | 426 | * @desc: the interrupt description structure for this irq |
430 | * | 427 | * |
431 | * Level type interrupts are active as long as the hardware line has | 428 | * Level type interrupts are active as long as the hardware line has |
@@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc) | |||
433 | * it after the associated handler has acknowledged the device, so the | 430 | * it after the associated handler has acknowledged the device, so the |
434 | * interrupt line is back to inactive. | 431 | * interrupt line is back to inactive. |
435 | */ | 432 | */ |
436 | void | 433 | void handle_level_irq(struct irq_desc *desc) |
437 | handle_level_irq(unsigned int irq, struct irq_desc *desc) | ||
438 | { | 434 | { |
439 | raw_spin_lock(&desc->lock); | 435 | raw_spin_lock(&desc->lock); |
440 | mask_ack_irq(desc); | 436 | mask_ack_irq(desc); |
@@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) | |||
496 | 492 | ||
497 | /** | 493 | /** |
498 | * handle_fasteoi_irq - irq handler for transparent controllers | 494 | * handle_fasteoi_irq - irq handler for transparent controllers |
499 | * @irq: the interrupt number | ||
500 | * @desc: the interrupt description structure for this irq | 495 | * @desc: the interrupt description structure for this irq |
501 | * | 496 | * |
502 | * Only a single callback will be issued to the chip: an ->eoi() | 497 | * Only a single callback will be issued to the chip: an ->eoi() |
@@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) | |||
504 | * for modern forms of interrupt handlers, which handle the flow | 499 | * for modern forms of interrupt handlers, which handle the flow |
505 | * details in hardware, transparently. | 500 | * details in hardware, transparently. |
506 | */ | 501 | */ |
507 | void | 502 | void handle_fasteoi_irq(struct irq_desc *desc) |
508 | handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | ||
509 | { | 503 | { |
510 | struct irq_chip *chip = desc->irq_data.chip; | 504 | struct irq_chip *chip = desc->irq_data.chip; |
511 | 505 | ||
@@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); | |||
546 | 540 | ||
547 | /** | 541 | /** |
548 | * handle_edge_irq - edge type IRQ handler | 542 | * handle_edge_irq - edge type IRQ handler |
549 | * @irq: the interrupt number | ||
550 | * @desc: the interrupt description structure for this irq | 543 | * @desc: the interrupt description structure for this irq |
551 | * | 544 | * |
552 | * Interrupt occures on the falling and/or rising edge of a hardware | 545 | * Interrupt occures on the falling and/or rising edge of a hardware |
@@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); | |||
560 | * the handler was running. If all pending interrupts are handled, the | 553 | * the handler was running. If all pending interrupts are handled, the |
561 | * loop is left. | 554 | * loop is left. |
562 | */ | 555 | */ |
563 | void | 556 | void handle_edge_irq(struct irq_desc *desc) |
564 | handle_edge_irq(unsigned int irq, struct irq_desc *desc) | ||
565 | { | 557 | { |
566 | raw_spin_lock(&desc->lock); | 558 | raw_spin_lock(&desc->lock); |
567 | 559 | ||
@@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq); | |||
618 | #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER | 610 | #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER |
619 | /** | 611 | /** |
620 | * handle_edge_eoi_irq - edge eoi type IRQ handler | 612 | * handle_edge_eoi_irq - edge eoi type IRQ handler |
621 | * @irq: the interrupt number | ||
622 | * @desc: the interrupt description structure for this irq | 613 | * @desc: the interrupt description structure for this irq |
623 | * | 614 | * |
624 | * Similar as the above handle_edge_irq, but using eoi and w/o the | 615 | * Similar as the above handle_edge_irq, but using eoi and w/o the |
625 | * mask/unmask logic. | 616 | * mask/unmask logic. |
626 | */ | 617 | */ |
627 | void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) | 618 | void handle_edge_eoi_irq(struct irq_desc *desc) |
628 | { | 619 | { |
629 | struct irq_chip *chip = irq_desc_get_chip(desc); | 620 | struct irq_chip *chip = irq_desc_get_chip(desc); |
630 | 621 | ||
@@ -665,13 +656,11 @@ out_eoi: | |||
665 | 656 | ||
666 | /** | 657 | /** |
667 | * handle_percpu_irq - Per CPU local irq handler | 658 | * handle_percpu_irq - Per CPU local irq handler |
668 | * @irq: the interrupt number | ||
669 | * @desc: the interrupt description structure for this irq | 659 | * @desc: the interrupt description structure for this irq |
670 | * | 660 | * |
671 | * Per CPU interrupts on SMP machines without locking requirements | 661 | * Per CPU interrupts on SMP machines without locking requirements |
672 | */ | 662 | */ |
673 | void | 663 | void handle_percpu_irq(struct irq_desc *desc) |
674 | handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | ||
675 | { | 664 | { |
676 | struct irq_chip *chip = irq_desc_get_chip(desc); | 665 | struct irq_chip *chip = irq_desc_get_chip(desc); |
677 | 666 | ||
@@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
688 | 677 | ||
689 | /** | 678 | /** |
690 | * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids | 679 | * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids |
691 | * @irq: the interrupt number | ||
692 | * @desc: the interrupt description structure for this irq | 680 | * @desc: the interrupt description structure for this irq |
693 | * | 681 | * |
694 | * Per CPU interrupts on SMP machines without locking requirements. Same as | 682 | * Per CPU interrupts on SMP machines without locking requirements. Same as |
@@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
698 | * contain the real device id for the cpu on which this handler is | 686 | * contain the real device id for the cpu on which this handler is |
699 | * called | 687 | * called |
700 | */ | 688 | */ |
701 | void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) | 689 | void handle_percpu_devid_irq(struct irq_desc *desc) |
702 | { | 690 | { |
703 | struct irq_chip *chip = irq_desc_get_chip(desc); | 691 | struct irq_chip *chip = irq_desc_get_chip(desc); |
704 | struct irqaction *action = desc->action; | 692 | struct irqaction *action = desc->action; |
705 | void *dev_id = raw_cpu_ptr(action->percpu_dev_id); | 693 | void *dev_id = raw_cpu_ptr(action->percpu_dev_id); |
694 | unsigned int irq = irq_desc_get_irq(desc); | ||
706 | irqreturn_t res; | 695 | irqreturn_t res; |
707 | 696 | ||
708 | kstat_incr_irqs_this_cpu(desc); | 697 | kstat_incr_irqs_this_cpu(desc); |
@@ -796,7 +785,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, | |||
796 | return; | 785 | return; |
797 | 786 | ||
798 | __irq_do_set_handler(desc, handle, 1, NULL); | 787 | __irq_do_set_handler(desc, handle, 1, NULL); |
799 | desc->irq_data.handler_data = data; | 788 | desc->irq_common_data.handler_data = data; |
800 | 789 | ||
801 | irq_put_desc_busunlock(desc, flags); | 790 | irq_put_desc_busunlock(desc, flags); |
802 | } | 791 | } |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b6eeea8a80c5..de41a68fc038 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -27,8 +27,10 @@ | |||
27 | * | 27 | * |
28 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. | 28 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. |
29 | */ | 29 | */ |
30 | void handle_bad_irq(unsigned int irq, struct irq_desc *desc) | 30 | void handle_bad_irq(struct irq_desc *desc) |
31 | { | 31 | { |
32 | unsigned int irq = irq_desc_get_irq(desc); | ||
33 | |||
32 | print_irq_desc(irq, desc); | 34 | print_irq_desc(irq, desc); |
33 | kstat_incr_irqs_this_cpu(desc); | 35 | kstat_incr_irqs_this_cpu(desc); |
34 | ack_bad_irq(irq); | 36 | ack_bad_irq(irq); |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index eee4b385cffb..5ef0c2dbe930 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) | |||
194 | 194 | ||
195 | static inline int irq_desc_get_node(struct irq_desc *desc) | 195 | static inline int irq_desc_get_node(struct irq_desc *desc) |
196 | { | 196 | { |
197 | return irq_data_get_node(&desc->irq_data); | 197 | return irq_common_data_get_node(&desc->irq_common_data); |
198 | } | 198 | } |
199 | 199 | ||
200 | #ifdef CONFIG_PM_SLEEP | 200 | #ifdef CONFIG_PM_SLEEP |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0a2a4b697bcb..239e2ae2c947 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void) | |||
38 | #ifdef CONFIG_SMP | 38 | #ifdef CONFIG_SMP |
39 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | 39 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) |
40 | { | 40 | { |
41 | if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | 41 | if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity, |
42 | gfp, node)) | ||
42 | return -ENOMEM; | 43 | return -ENOMEM; |
43 | 44 | ||
44 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 45 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
45 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | 46 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { |
46 | free_cpumask_var(desc->irq_data.affinity); | 47 | free_cpumask_var(desc->irq_common_data.affinity); |
47 | return -ENOMEM; | 48 | return -ENOMEM; |
48 | } | 49 | } |
49 | #endif | 50 | #endif |
@@ -52,11 +53,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | |||
52 | 53 | ||
53 | static void desc_smp_init(struct irq_desc *desc, int node) | 54 | static void desc_smp_init(struct irq_desc *desc, int node) |
54 | { | 55 | { |
55 | desc->irq_data.node = node; | 56 | cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity); |
56 | cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | ||
57 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 57 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
58 | cpumask_clear(desc->pending_mask); | 58 | cpumask_clear(desc->pending_mask); |
59 | #endif | 59 | #endif |
60 | #ifdef CONFIG_NUMA | ||
61 | desc->irq_common_data.node = node; | ||
62 | #endif | ||
60 | } | 63 | } |
61 | 64 | ||
62 | #else | 65 | #else |
@@ -70,12 +73,13 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, | |||
70 | { | 73 | { |
71 | int cpu; | 74 | int cpu; |
72 | 75 | ||
76 | desc->irq_common_data.handler_data = NULL; | ||
77 | desc->irq_common_data.msi_desc = NULL; | ||
78 | |||
73 | desc->irq_data.common = &desc->irq_common_data; | 79 | desc->irq_data.common = &desc->irq_common_data; |
74 | desc->irq_data.irq = irq; | 80 | desc->irq_data.irq = irq; |
75 | desc->irq_data.chip = &no_irq_chip; | 81 | desc->irq_data.chip = &no_irq_chip; |
76 | desc->irq_data.chip_data = NULL; | 82 | desc->irq_data.chip_data = NULL; |
77 | desc->irq_data.handler_data = NULL; | ||
78 | desc->irq_data.msi_desc = NULL; | ||
79 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); | 83 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); |
80 | irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); | 84 | irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); |
81 | desc->handle_irq = handle_bad_irq; | 85 | desc->handle_irq = handle_bad_irq; |
@@ -121,7 +125,7 @@ static void free_masks(struct irq_desc *desc) | |||
121 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 125 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
122 | free_cpumask_var(desc->pending_mask); | 126 | free_cpumask_var(desc->pending_mask); |
123 | #endif | 127 | #endif |
124 | free_cpumask_var(desc->irq_data.affinity); | 128 | free_cpumask_var(desc->irq_common_data.affinity); |
125 | } | 129 | } |
126 | #else | 130 | #else |
127 | static inline void free_masks(struct irq_desc *desc) { } | 131 | static inline void free_masks(struct irq_desc *desc) { } |
@@ -343,7 +347,7 @@ int generic_handle_irq(unsigned int irq) | |||
343 | 347 | ||
344 | if (!desc) | 348 | if (!desc) |
345 | return -EINVAL; | 349 | return -EINVAL; |
346 | generic_handle_irq_desc(irq, desc); | 350 | generic_handle_irq_desc(desc); |
347 | return 0; | 351 | return 0; |
348 | } | 352 | } |
349 | EXPORT_SYMBOL_GPL(generic_handle_irq); | 353 | EXPORT_SYMBOL_GPL(generic_handle_irq); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 79baaf8a7813..dc9d27c0c158 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, | |||
844 | child->parent_data = irq_data; | 844 | child->parent_data = irq_data; |
845 | irq_data->irq = child->irq; | 845 | irq_data->irq = child->irq; |
846 | irq_data->common = child->common; | 846 | irq_data->common = child->common; |
847 | irq_data->node = child->node; | ||
848 | irq_data->domain = domain; | 847 | irq_data->domain = domain; |
849 | } | 848 | } |
850 | 849 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad1b064f94fe..f9a59f6cabd2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
192 | switch (ret) { | 192 | switch (ret) { |
193 | case IRQ_SET_MASK_OK: | 193 | case IRQ_SET_MASK_OK: |
194 | case IRQ_SET_MASK_OK_DONE: | 194 | case IRQ_SET_MASK_OK_DONE: |
195 | cpumask_copy(data->affinity, mask); | 195 | cpumask_copy(desc->irq_common_data.affinity, mask); |
196 | case IRQ_SET_MASK_OK_NOCOPY: | 196 | case IRQ_SET_MASK_OK_NOCOPY: |
197 | irq_set_thread_affinity(desc); | 197 | irq_set_thread_affinity(desc); |
198 | ret = 0; | 198 | ret = 0; |
@@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work) | |||
304 | if (irq_move_pending(&desc->irq_data)) | 304 | if (irq_move_pending(&desc->irq_data)) |
305 | irq_get_pending(cpumask, desc); | 305 | irq_get_pending(cpumask, desc); |
306 | else | 306 | else |
307 | cpumask_copy(cpumask, desc->irq_data.affinity); | 307 | cpumask_copy(cpumask, desc->irq_common_data.affinity); |
308 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 308 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
309 | 309 | ||
310 | notify->notify(notify, cpumask); | 310 | notify->notify(notify, cpumask); |
@@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) | |||
375 | * one of the targets is online. | 375 | * one of the targets is online. |
376 | */ | 376 | */ |
377 | if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { | 377 | if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { |
378 | if (cpumask_intersects(desc->irq_data.affinity, | 378 | if (cpumask_intersects(desc->irq_common_data.affinity, |
379 | cpu_online_mask)) | 379 | cpu_online_mask)) |
380 | set = desc->irq_data.affinity; | 380 | set = desc->irq_common_data.affinity; |
381 | else | 381 | else |
382 | irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); | 382 | irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); |
383 | } | 383 | } |
@@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) | |||
829 | * This code is triggered unconditionally. Check the affinity | 829 | * This code is triggered unconditionally. Check the affinity |
830 | * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. | 830 | * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. |
831 | */ | 831 | */ |
832 | if (desc->irq_data.affinity) | 832 | if (desc->irq_common_data.affinity) |
833 | cpumask_copy(mask, desc->irq_data.affinity); | 833 | cpumask_copy(mask, desc->irq_common_data.affinity); |
834 | else | 834 | else |
835 | valid = false; | 835 | valid = false; |
836 | raw_spin_unlock_irq(&desc->lock); | 836 | raw_spin_unlock_irq(&desc->lock); |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 0e97c142ce40..e3a8c9577ba6 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -39,7 +39,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
39 | static int show_irq_affinity(int type, struct seq_file *m, void *v) | 39 | static int show_irq_affinity(int type, struct seq_file *m, void *v) |
40 | { | 40 | { |
41 | struct irq_desc *desc = irq_to_desc((long)m->private); | 41 | struct irq_desc *desc = irq_to_desc((long)m->private); |
42 | const struct cpumask *mask = desc->irq_data.affinity; | 42 | const struct cpumask *mask = desc->irq_common_data.affinity; |
43 | 43 | ||
44 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 44 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
45 | if (irqd_is_setaffinity_pending(&desc->irq_data)) | 45 | if (irqd_is_setaffinity_pending(&desc->irq_data)) |
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index dd95f44f99b2..b86886beee4f 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
@@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg) | |||
38 | clear_bit(irq, irqs_resend); | 38 | clear_bit(irq, irqs_resend); |
39 | desc = irq_to_desc(irq); | 39 | desc = irq_to_desc(irq); |
40 | local_irq_disable(); | 40 | local_irq_disable(); |
41 | desc->handle_irq(irq, desc); | 41 | desc->handle_irq(desc); |
42 | local_irq_enable(); | 42 | local_irq_enable(); |
43 | } | 43 | } |
44 | } | 44 | } |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 337c8818541d..87e9ce6a63c5 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
289 | if (pv_enabled()) | 289 | if (pv_enabled()) |
290 | goto queue; | 290 | goto queue; |
291 | 291 | ||
292 | if (virt_queued_spin_lock(lock)) | 292 | if (virt_spin_lock(lock)) |
293 | return; | 293 | return; |
294 | 294 | ||
295 | /* | 295 | /* |
diff --git a/kernel/membarrier.c b/kernel/membarrier.c new file mode 100644 index 000000000000..536c727a56e9 --- /dev/null +++ b/kernel/membarrier.c | |||
@@ -0,0 +1,66 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2010, 2015 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> | ||
3 | * | ||
4 | * membarrier system call | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | */ | ||
16 | |||
17 | #include <linux/syscalls.h> | ||
18 | #include <linux/membarrier.h> | ||
19 | |||
20 | /* | ||
21 | * Bitmask made from a "or" of all commands within enum membarrier_cmd, | ||
22 | * except MEMBARRIER_CMD_QUERY. | ||
23 | */ | ||
24 | #define MEMBARRIER_CMD_BITMASK (MEMBARRIER_CMD_SHARED) | ||
25 | |||
26 | /** | ||
27 | * sys_membarrier - issue memory barriers on a set of threads | ||
28 | * @cmd: Takes command values defined in enum membarrier_cmd. | ||
29 | * @flags: Currently needs to be 0. For future extensions. | ||
30 | * | ||
31 | * If this system call is not implemented, -ENOSYS is returned. If the | ||
32 | * command specified does not exist, or if the command argument is invalid, | ||
33 | * this system call returns -EINVAL. For a given command, with flags argument | ||
34 | * set to 0, this system call is guaranteed to always return the same value | ||
35 | * until reboot. | ||
36 | * | ||
37 | * All memory accesses performed in program order from each targeted thread | ||
38 | * is guaranteed to be ordered with respect to sys_membarrier(). If we use | ||
39 | * the semantic "barrier()" to represent a compiler barrier forcing memory | ||
40 | * accesses to be performed in program order across the barrier, and | ||
41 | * smp_mb() to represent explicit memory barriers forcing full memory | ||
42 | * ordering across the barrier, we have the following ordering table for | ||
43 | * each pair of barrier(), sys_membarrier() and smp_mb(): | ||
44 | * | ||
45 | * The pair ordering is detailed as (O: ordered, X: not ordered): | ||
46 | * | ||
47 | * barrier() smp_mb() sys_membarrier() | ||
48 | * barrier() X X O | ||
49 | * smp_mb() X O O | ||
50 | * sys_membarrier() O O O | ||
51 | */ | ||
52 | SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) | ||
53 | { | ||
54 | if (unlikely(flags)) | ||
55 | return -EINVAL; | ||
56 | switch (cmd) { | ||
57 | case MEMBARRIER_CMD_QUERY: | ||
58 | return MEMBARRIER_CMD_BITMASK; | ||
59 | case MEMBARRIER_CMD_SHARED: | ||
60 | if (num_online_cpus() > 1) | ||
61 | synchronize_sched(); | ||
62 | return 0; | ||
63 | default: | ||
64 | return -EINVAL; | ||
65 | } | ||
66 | } | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3595403921bd..2f9c92884817 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -621,18 +621,21 @@ int get_nohz_timer_target(void) | |||
621 | int i, cpu = smp_processor_id(); | 621 | int i, cpu = smp_processor_id(); |
622 | struct sched_domain *sd; | 622 | struct sched_domain *sd; |
623 | 623 | ||
624 | if (!idle_cpu(cpu)) | 624 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) |
625 | return cpu; | 625 | return cpu; |
626 | 626 | ||
627 | rcu_read_lock(); | 627 | rcu_read_lock(); |
628 | for_each_domain(cpu, sd) { | 628 | for_each_domain(cpu, sd) { |
629 | for_each_cpu(i, sched_domain_span(sd)) { | 629 | for_each_cpu(i, sched_domain_span(sd)) { |
630 | if (!idle_cpu(i)) { | 630 | if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) { |
631 | cpu = i; | 631 | cpu = i; |
632 | goto unlock; | 632 | goto unlock; |
633 | } | 633 | } |
634 | } | 634 | } |
635 | } | 635 | } |
636 | |||
637 | if (!is_housekeeping_cpu(cpu)) | ||
638 | cpu = housekeeping_any_cpu(); | ||
636 | unlock: | 639 | unlock: |
637 | rcu_read_unlock(); | 640 | rcu_read_unlock(); |
638 | return cpu; | 641 | return cpu; |
@@ -2666,13 +2669,20 @@ unsigned long nr_running(void) | |||
2666 | 2669 | ||
2667 | /* | 2670 | /* |
2668 | * Check if only the current task is running on the cpu. | 2671 | * Check if only the current task is running on the cpu. |
2672 | * | ||
2673 | * Caution: this function does not check that the caller has disabled | ||
2674 | * preemption, thus the result might have a time-of-check-to-time-of-use | ||
2675 | * race. The caller is responsible to use it correctly, for example: | ||
2676 | * | ||
2677 | * - from a non-preemptable section (of course) | ||
2678 | * | ||
2679 | * - from a thread that is bound to a single CPU | ||
2680 | * | ||
2681 | * - in a loop with very short iterations (e.g. a polling loop) | ||
2669 | */ | 2682 | */ |
2670 | bool single_task_running(void) | 2683 | bool single_task_running(void) |
2671 | { | 2684 | { |
2672 | if (cpu_rq(smp_processor_id())->nr_running == 1) | 2685 | return raw_rq()->nr_running == 1; |
2673 | return true; | ||
2674 | else | ||
2675 | return false; | ||
2676 | } | 2686 | } |
2677 | EXPORT_SYMBOL(single_task_running); | 2687 | EXPORT_SYMBOL(single_task_running); |
2678 | 2688 | ||
@@ -5178,24 +5188,47 @@ static void migrate_tasks(struct rq *dead_rq) | |||
5178 | break; | 5188 | break; |
5179 | 5189 | ||
5180 | /* | 5190 | /* |
5181 | * Ensure rq->lock covers the entire task selection | 5191 | * pick_next_task assumes pinned rq->lock. |
5182 | * until the migration. | ||
5183 | */ | 5192 | */ |
5184 | lockdep_pin_lock(&rq->lock); | 5193 | lockdep_pin_lock(&rq->lock); |
5185 | next = pick_next_task(rq, &fake_task); | 5194 | next = pick_next_task(rq, &fake_task); |
5186 | BUG_ON(!next); | 5195 | BUG_ON(!next); |
5187 | next->sched_class->put_prev_task(rq, next); | 5196 | next->sched_class->put_prev_task(rq, next); |
5188 | 5197 | ||
5198 | /* | ||
5199 | * Rules for changing task_struct::cpus_allowed are holding | ||
5200 | * both pi_lock and rq->lock, such that holding either | ||
5201 | * stabilizes the mask. | ||
5202 | * | ||
5203 | * Drop rq->lock is not quite as disastrous as it usually is | ||
5204 | * because !cpu_active at this point, which means load-balance | ||
5205 | * will not interfere. Also, stop-machine. | ||
5206 | */ | ||
5207 | lockdep_unpin_lock(&rq->lock); | ||
5208 | raw_spin_unlock(&rq->lock); | ||
5209 | raw_spin_lock(&next->pi_lock); | ||
5210 | raw_spin_lock(&rq->lock); | ||
5211 | |||
5212 | /* | ||
5213 | * Since we're inside stop-machine, _nothing_ should have | ||
5214 | * changed the task, WARN if weird stuff happened, because in | ||
5215 | * that case the above rq->lock drop is a fail too. | ||
5216 | */ | ||
5217 | if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { | ||
5218 | raw_spin_unlock(&next->pi_lock); | ||
5219 | continue; | ||
5220 | } | ||
5221 | |||
5189 | /* Find suitable destination for @next, with force if needed. */ | 5222 | /* Find suitable destination for @next, with force if needed. */ |
5190 | dest_cpu = select_fallback_rq(dead_rq->cpu, next); | 5223 | dest_cpu = select_fallback_rq(dead_rq->cpu, next); |
5191 | 5224 | ||
5192 | lockdep_unpin_lock(&rq->lock); | ||
5193 | rq = __migrate_task(rq, next, dest_cpu); | 5225 | rq = __migrate_task(rq, next, dest_cpu); |
5194 | if (rq != dead_rq) { | 5226 | if (rq != dead_rq) { |
5195 | raw_spin_unlock(&rq->lock); | 5227 | raw_spin_unlock(&rq->lock); |
5196 | rq = dead_rq; | 5228 | rq = dead_rq; |
5197 | raw_spin_lock(&rq->lock); | 5229 | raw_spin_lock(&rq->lock); |
5198 | } | 5230 | } |
5231 | raw_spin_unlock(&next->pi_lock); | ||
5199 | } | 5232 | } |
5200 | 5233 | ||
5201 | rq->stop = stop; | 5234 | rq->stop = stop; |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 272d9322bc5d..052e02672d12 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -106,10 +106,9 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) | |||
106 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(__wake_up_locked); | 107 | EXPORT_SYMBOL_GPL(__wake_up_locked); |
108 | 108 | ||
109 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, | 109 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) |
110 | void *key) | ||
111 | { | 110 | { |
112 | __wake_up_common(q, mode, nr, 0, key); | 111 | __wake_up_common(q, mode, 1, 0, key); |
113 | } | 112 | } |
114 | EXPORT_SYMBOL_GPL(__wake_up_locked_key); | 113 | EXPORT_SYMBOL_GPL(__wake_up_locked_key); |
115 | 114 | ||
@@ -284,7 +283,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, | |||
284 | if (!list_empty(&wait->task_list)) | 283 | if (!list_empty(&wait->task_list)) |
285 | list_del_init(&wait->task_list); | 284 | list_del_init(&wait->task_list); |
286 | else if (waitqueue_active(q)) | 285 | else if (waitqueue_active(q)) |
287 | __wake_up_locked_key(q, mode, 1, key); | 286 | __wake_up_locked_key(q, mode, key); |
288 | spin_unlock_irqrestore(&q->lock, flags); | 287 | spin_unlock_irqrestore(&q->lock, flags); |
289 | } | 288 | } |
290 | EXPORT_SYMBOL(abort_exclusive_wait); | 289 | EXPORT_SYMBOL(abort_exclusive_wait); |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 03c3875d9958..a02decf15583 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -245,3 +245,6 @@ cond_syscall(sys_bpf); | |||
245 | 245 | ||
246 | /* execveat */ | 246 | /* execveat */ |
247 | cond_syscall(sys_execveat); | 247 | cond_syscall(sys_execveat); |
248 | |||
249 | /* membarrier */ | ||
250 | cond_syscall(sys_membarrier); | ||
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 50eb107f1198..a9b76a40319e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns); | |||
97 | static int __clockevents_switch_state(struct clock_event_device *dev, | 97 | static int __clockevents_switch_state(struct clock_event_device *dev, |
98 | enum clock_event_state state) | 98 | enum clock_event_state state) |
99 | { | 99 | { |
100 | /* Transition with legacy set_mode() callback */ | ||
101 | if (dev->set_mode) { | ||
102 | /* Legacy callback doesn't support new modes */ | ||
103 | if (state > CLOCK_EVT_STATE_ONESHOT) | ||
104 | return -ENOSYS; | ||
105 | /* | ||
106 | * 'clock_event_state' and 'clock_event_mode' have 1-to-1 | ||
107 | * mapping until *_ONESHOT, and so a simple cast will work. | ||
108 | */ | ||
109 | dev->set_mode((enum clock_event_mode)state, dev); | ||
110 | dev->mode = (enum clock_event_mode)state; | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | if (dev->features & CLOCK_EVT_FEAT_DUMMY) | 100 | if (dev->features & CLOCK_EVT_FEAT_DUMMY) |
115 | return 0; | 101 | return 0; |
116 | 102 | ||
@@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev) | |||
204 | { | 190 | { |
205 | int ret = 0; | 191 | int ret = 0; |
206 | 192 | ||
207 | if (dev->set_mode) { | 193 | if (dev->tick_resume) |
208 | dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); | ||
209 | dev->mode = CLOCK_EVT_MODE_RESUME; | ||
210 | } else if (dev->tick_resume) { | ||
211 | ret = dev->tick_resume(dev); | 194 | ret = dev->tick_resume(dev); |
212 | } | ||
213 | 195 | ||
214 | return ret; | 196 | return ret; |
215 | } | 197 | } |
@@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) | |||
460 | } | 442 | } |
461 | EXPORT_SYMBOL_GPL(clockevents_unbind_device); | 443 | EXPORT_SYMBOL_GPL(clockevents_unbind_device); |
462 | 444 | ||
463 | /* Sanity check of state transition callbacks */ | ||
464 | static int clockevents_sanity_check(struct clock_event_device *dev) | ||
465 | { | ||
466 | /* Legacy set_mode() callback */ | ||
467 | if (dev->set_mode) { | ||
468 | /* We shouldn't be supporting new modes now */ | ||
469 | WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || | ||
470 | dev->set_state_shutdown || dev->tick_resume || | ||
471 | dev->set_state_oneshot_stopped); | ||
472 | |||
473 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | if (dev->features & CLOCK_EVT_FEAT_DUMMY) | ||
478 | return 0; | ||
479 | |||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | /** | 445 | /** |
484 | * clockevents_register_device - register a clock event device | 446 | * clockevents_register_device - register a clock event device |
485 | * @dev: device to register | 447 | * @dev: device to register |
@@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
488 | { | 450 | { |
489 | unsigned long flags; | 451 | unsigned long flags; |
490 | 452 | ||
491 | BUG_ON(clockevents_sanity_check(dev)); | ||
492 | |||
493 | /* Initialize state to DETACHED */ | 453 | /* Initialize state to DETACHED */ |
494 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); | 454 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); |
495 | 455 | ||
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d11c55b6ab7d..4fcd99e12aa0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu) | |||
398 | * the set mode function! | 398 | * the set mode function! |
399 | */ | 399 | */ |
400 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); | 400 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); |
401 | dev->mode = CLOCK_EVT_MODE_UNUSED; | ||
402 | clockevents_exchange_device(dev, NULL); | 401 | clockevents_exchange_device(dev, NULL); |
403 | dev->event_handler = clockevents_handle_noop; | 402 | dev->event_handler = clockevents_handle_noop; |
404 | td->evtdev = NULL; | 403 | td->evtdev = NULL; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16f31e5..7c7ec4515983 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str) | |||
290 | __setup("nohz_full=", tick_nohz_full_setup); | 290 | __setup("nohz_full=", tick_nohz_full_setup); |
291 | 291 | ||
292 | static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | 292 | static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, |
293 | unsigned long action, | 293 | unsigned long action, |
294 | void *hcpu) | 294 | void *hcpu) |
295 | { | 295 | { |
296 | unsigned int cpu = (unsigned long)hcpu; | 296 | unsigned int cpu = (unsigned long)hcpu; |
297 | 297 | ||
298 | switch (action & ~CPU_TASKS_FROZEN) { | 298 | switch (action & ~CPU_TASKS_FROZEN) { |
299 | case CPU_DOWN_PREPARE: | 299 | case CPU_DOWN_PREPARE: |
300 | /* | 300 | /* |
301 | * If we handle the timekeeping duty for full dynticks CPUs, | 301 | * The boot CPU handles housekeeping duty (unbound timers, |
302 | * we can't safely shutdown that CPU. | 302 | * workqueues, timekeeping, ...) on behalf of full dynticks |
303 | * CPUs. It must remain online when nohz full is enabled. | ||
303 | */ | 304 | */ |
304 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) | 305 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
305 | return NOTIFY_BAD; | 306 | return NOTIFY_BAD; |
@@ -370,6 +371,12 @@ void __init tick_nohz_init(void) | |||
370 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 371 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
371 | pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", | 372 | pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", |
372 | cpumask_pr_args(tick_nohz_full_mask)); | 373 | cpumask_pr_args(tick_nohz_full_mask)); |
374 | |||
375 | /* | ||
376 | * We need at least one CPU to handle housekeeping work such | ||
377 | * as timekeeping, unbound timers, workqueues, ... | ||
378 | */ | ||
379 | WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); | ||
373 | } | 380 | } |
374 | #endif | 381 | #endif |
375 | 382 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f6ee2e6b6f5d..3739ac6aa473 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, | |||
1614 | negative = (tick_error < 0); | 1614 | negative = (tick_error < 0); |
1615 | 1615 | ||
1616 | /* Sort out the magnitude of the correction */ | 1616 | /* Sort out the magnitude of the correction */ |
1617 | tick_error = abs(tick_error); | 1617 | tick_error = abs64(tick_error); |
1618 | for (adj = 0; tick_error > interval; adj++) | 1618 | for (adj = 0; tick_error > interval; adj++) |
1619 | tick_error >>= 1; | 1619 | tick_error >>= 1; |
1620 | 1620 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 129c96033e46..f75e35b60149 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) | |||
225 | (unsigned long long) dev->min_delta_ns); | 225 | (unsigned long long) dev->min_delta_ns); |
226 | SEQ_printf(m, " mult: %u\n", dev->mult); | 226 | SEQ_printf(m, " mult: %u\n", dev->mult); |
227 | SEQ_printf(m, " shift: %u\n", dev->shift); | 227 | SEQ_printf(m, " shift: %u\n", dev->shift); |
228 | SEQ_printf(m, " mode: %d\n", dev->mode); | 228 | SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev)); |
229 | SEQ_printf(m, " next_event: %Ld nsecs\n", | 229 | SEQ_printf(m, " next_event: %Ld nsecs\n", |
230 | (unsigned long long) ktime_to_ns(dev->next_event)); | 230 | (unsigned long long) ktime_to_ns(dev->next_event)); |
231 | 231 | ||
@@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) | |||
233 | print_name_offset(m, dev->set_next_event); | 233 | print_name_offset(m, dev->set_next_event); |
234 | SEQ_printf(m, "\n"); | 234 | SEQ_printf(m, "\n"); |
235 | 235 | ||
236 | if (dev->set_mode) { | 236 | if (dev->set_state_shutdown) { |
237 | SEQ_printf(m, " set_mode: "); | 237 | SEQ_printf(m, " shutdown: "); |
238 | print_name_offset(m, dev->set_mode); | 238 | print_name_offset(m, dev->set_state_shutdown); |
239 | SEQ_printf(m, "\n"); | 239 | SEQ_printf(m, "\n"); |
240 | } else { | 240 | } |
241 | if (dev->set_state_shutdown) { | ||
242 | SEQ_printf(m, " shutdown: "); | ||
243 | print_name_offset(m, dev->set_state_shutdown); | ||
244 | SEQ_printf(m, "\n"); | ||
245 | } | ||
246 | 241 | ||
247 | if (dev->set_state_periodic) { | 242 | if (dev->set_state_periodic) { |
248 | SEQ_printf(m, " periodic: "); | 243 | SEQ_printf(m, " periodic: "); |
249 | print_name_offset(m, dev->set_state_periodic); | 244 | print_name_offset(m, dev->set_state_periodic); |
250 | SEQ_printf(m, "\n"); | 245 | SEQ_printf(m, "\n"); |
251 | } | 246 | } |
252 | 247 | ||
253 | if (dev->set_state_oneshot) { | 248 | if (dev->set_state_oneshot) { |
254 | SEQ_printf(m, " oneshot: "); | 249 | SEQ_printf(m, " oneshot: "); |
255 | print_name_offset(m, dev->set_state_oneshot); | 250 | print_name_offset(m, dev->set_state_oneshot); |
256 | SEQ_printf(m, "\n"); | 251 | SEQ_printf(m, "\n"); |
257 | } | 252 | } |
258 | 253 | ||
259 | if (dev->set_state_oneshot_stopped) { | 254 | if (dev->set_state_oneshot_stopped) { |
260 | SEQ_printf(m, " oneshot stopped: "); | 255 | SEQ_printf(m, " oneshot stopped: "); |
261 | print_name_offset(m, dev->set_state_oneshot_stopped); | 256 | print_name_offset(m, dev->set_state_oneshot_stopped); |
262 | SEQ_printf(m, "\n"); | 257 | SEQ_printf(m, "\n"); |
263 | } | 258 | } |
264 | 259 | ||
265 | if (dev->tick_resume) { | 260 | if (dev->tick_resume) { |
266 | SEQ_printf(m, " resume: "); | 261 | SEQ_printf(m, " resume: "); |
267 | print_name_offset(m, dev->tick_resume); | 262 | print_name_offset(m, dev->tick_resume); |
268 | SEQ_printf(m, "\n"); | 263 | SEQ_printf(m, "\n"); |
269 | } | ||
270 | } | 264 | } |
271 | 265 | ||
272 | SEQ_printf(m, " event_handler: "); | 266 | SEQ_printf(m, " event_handler: "); |