aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c110
-rw-r--r--kernel/events/core.c114
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/irq/chip.c33
-rw-r--r--kernel/irq/handle.c6
-rw-r--r--kernel/irq/internals.h2
-rw-r--r--kernel/irq/irqdesc.c20
-rw-r--r--kernel/irq/irqdomain.c1
-rw-r--r--kernel/irq/manage.c12
-rw-r--r--kernel/irq/msi.c6
-rw-r--r--kernel/irq/proc.c21
-rw-r--r--kernel/irq/resend.c2
-rw-r--r--kernel/locking/lockdep.c10
-rw-r--r--kernel/locking/qspinlock.c2
-rw-r--r--kernel/rcu/tree.c5
-rw-r--r--kernel/sched/core.c75
-rw-r--r--kernel/sched/sched.h5
-rw-r--r--kernel/sched/wait.c7
-rw-r--r--kernel/time/clockevents.c42
-rw-r--r--kernel/time/clocksource.c2
-rw-r--r--kernel/time/tick-common.c1
-rw-r--r--kernel/time/tick-sched.c15
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/time/timer_list.c54
-rw-r--r--kernel/workqueue.c8
25 files changed, 341 insertions, 220 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2cf0f79f1fc9..2c9eae6ad970 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -46,7 +46,6 @@
46#include <linux/slab.h> 46#include <linux/slab.h>
47#include <linux/spinlock.h> 47#include <linux/spinlock.h>
48#include <linux/rwsem.h> 48#include <linux/rwsem.h>
49#include <linux/percpu-rwsem.h>
50#include <linux/string.h> 49#include <linux/string.h>
51#include <linux/sort.h> 50#include <linux/sort.h>
52#include <linux/kmod.h> 51#include <linux/kmod.h>
@@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock);
104 */ 103 */
105static DEFINE_SPINLOCK(release_agent_path_lock); 104static DEFINE_SPINLOCK(release_agent_path_lock);
106 105
107struct percpu_rw_semaphore cgroup_threadgroup_rwsem;
108
109#define cgroup_assert_mutex_or_rcu_locked() \ 106#define cgroup_assert_mutex_or_rcu_locked() \
110 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ 107 RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
111 !lockdep_is_held(&cgroup_mutex), \ 108 !lockdep_is_held(&cgroup_mutex), \
@@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset,
874 return cset; 871 return cset;
875} 872}
876 873
874void cgroup_threadgroup_change_begin(struct task_struct *tsk)
875{
876 down_read(&tsk->signal->group_rwsem);
877}
878
879void cgroup_threadgroup_change_end(struct task_struct *tsk)
880{
881 up_read(&tsk->signal->group_rwsem);
882}
883
884/**
885 * threadgroup_lock - lock threadgroup
886 * @tsk: member task of the threadgroup to lock
887 *
888 * Lock the threadgroup @tsk belongs to. No new task is allowed to enter
889 * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
890 * change ->group_leader/pid. This is useful for cases where the threadgroup
891 * needs to stay stable across blockable operations.
892 *
893 * fork and exit explicitly call threadgroup_change_{begin|end}() for
894 * synchronization. While held, no new task will be added to threadgroup
895 * and no existing live task will have its PF_EXITING set.
896 *
897 * de_thread() does threadgroup_change_{begin|end}() when a non-leader
898 * sub-thread becomes a new leader.
899 */
900static void threadgroup_lock(struct task_struct *tsk)
901{
902 down_write(&tsk->signal->group_rwsem);
903}
904
905/**
906 * threadgroup_unlock - unlock threadgroup
907 * @tsk: member task of the threadgroup to unlock
908 *
909 * Reverse threadgroup_lock().
910 */
911static inline void threadgroup_unlock(struct task_struct *tsk)
912{
913 up_write(&tsk->signal->group_rwsem);
914}
915
877static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) 916static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
878{ 917{
879 struct cgroup *root_cgrp = kf_root->kn->priv; 918 struct cgroup *root_cgrp = kf_root->kn->priv;
@@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
2074 lockdep_assert_held(&css_set_rwsem); 2113 lockdep_assert_held(&css_set_rwsem);
2075 2114
2076 /* 2115 /*
2077 * We are synchronized through cgroup_threadgroup_rwsem against 2116 * We are synchronized through threadgroup_lock() against PF_EXITING
2078 * PF_EXITING setting such that we can't race against cgroup_exit() 2117 * setting such that we can't race against cgroup_exit() changing the
2079 * changing the css_set to init_css_set and dropping the old one. 2118 * css_set to init_css_set and dropping the old one.
2080 */ 2119 */
2081 WARN_ON_ONCE(tsk->flags & PF_EXITING); 2120 WARN_ON_ONCE(tsk->flags & PF_EXITING);
2082 old_cset = task_css_set(tsk); 2121 old_cset = task_css_set(tsk);
@@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
2133 * @src_cset and add it to @preloaded_csets, which should later be cleaned 2172 * @src_cset and add it to @preloaded_csets, which should later be cleaned
2134 * up by cgroup_migrate_finish(). 2173 * up by cgroup_migrate_finish().
2135 * 2174 *
2136 * This function may be called without holding cgroup_threadgroup_rwsem 2175 * This function may be called without holding threadgroup_lock even if the
2137 * even if the target is a process. Threads may be created and destroyed 2176 * target is a process. Threads may be created and destroyed but as long
2138 * but as long as cgroup_mutex is not dropped, no new css_set can be put 2177 * as cgroup_mutex is not dropped, no new css_set can be put into play and
2139 * into play and the preloaded css_sets are guaranteed to cover all 2178 * the preloaded css_sets are guaranteed to cover all migrations.
2140 * migrations.
2141 */ 2179 */
2142static void cgroup_migrate_add_src(struct css_set *src_cset, 2180static void cgroup_migrate_add_src(struct css_set *src_cset,
2143 struct cgroup *dst_cgrp, 2181 struct cgroup *dst_cgrp,
@@ -2240,7 +2278,7 @@ err:
2240 * @threadgroup: whether @leader points to the whole process or a single task 2278 * @threadgroup: whether @leader points to the whole process or a single task
2241 * 2279 *
2242 * Migrate a process or task denoted by @leader to @cgrp. If migrating a 2280 * Migrate a process or task denoted by @leader to @cgrp. If migrating a
2243 * process, the caller must be holding cgroup_threadgroup_rwsem. The 2281 * process, the caller must be holding threadgroup_lock of @leader. The
2244 * caller is also responsible for invoking cgroup_migrate_add_src() and 2282 * caller is also responsible for invoking cgroup_migrate_add_src() and
2245 * cgroup_migrate_prepare_dst() on the targets before invoking this 2283 * cgroup_migrate_prepare_dst() on the targets before invoking this
2246 * function and following up with cgroup_migrate_finish(). 2284 * function and following up with cgroup_migrate_finish().
@@ -2368,7 +2406,7 @@ out_release_tset:
2368 * @leader: the task or the leader of the threadgroup to be attached 2406 * @leader: the task or the leader of the threadgroup to be attached
2369 * @threadgroup: attach the whole threadgroup? 2407 * @threadgroup: attach the whole threadgroup?
2370 * 2408 *
2371 * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. 2409 * Call holding cgroup_mutex and threadgroup_lock of @leader.
2372 */ 2410 */
2373static int cgroup_attach_task(struct cgroup *dst_cgrp, 2411static int cgroup_attach_task(struct cgroup *dst_cgrp,
2374 struct task_struct *leader, bool threadgroup) 2412 struct task_struct *leader, bool threadgroup)
@@ -2460,13 +2498,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2460 if (!cgrp) 2498 if (!cgrp)
2461 return -ENODEV; 2499 return -ENODEV;
2462 2500
2463 percpu_down_write(&cgroup_threadgroup_rwsem); 2501retry_find_task:
2464 rcu_read_lock(); 2502 rcu_read_lock();
2465 if (pid) { 2503 if (pid) {
2466 tsk = find_task_by_vpid(pid); 2504 tsk = find_task_by_vpid(pid);
2467 if (!tsk) { 2505 if (!tsk) {
2506 rcu_read_unlock();
2468 ret = -ESRCH; 2507 ret = -ESRCH;
2469 goto out_unlock_rcu; 2508 goto out_unlock_cgroup;
2470 } 2509 }
2471 } else { 2510 } else {
2472 tsk = current; 2511 tsk = current;
@@ -2482,23 +2521,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf,
2482 */ 2521 */
2483 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { 2522 if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) {
2484 ret = -EINVAL; 2523 ret = -EINVAL;
2485 goto out_unlock_rcu; 2524 rcu_read_unlock();
2525 goto out_unlock_cgroup;
2486 } 2526 }
2487 2527
2488 get_task_struct(tsk); 2528 get_task_struct(tsk);
2489 rcu_read_unlock(); 2529 rcu_read_unlock();
2490 2530
2531 threadgroup_lock(tsk);
2532 if (threadgroup) {
2533 if (!thread_group_leader(tsk)) {
2534 /*
2535 * a race with de_thread from another thread's exec()
2536 * may strip us of our leadership, if this happens,
2537 * there is no choice but to throw this task away and
2538 * try again; this is
2539 * "double-double-toil-and-trouble-check locking".
2540 */
2541 threadgroup_unlock(tsk);
2542 put_task_struct(tsk);
2543 goto retry_find_task;
2544 }
2545 }
2546
2491 ret = cgroup_procs_write_permission(tsk, cgrp, of); 2547 ret = cgroup_procs_write_permission(tsk, cgrp, of);
2492 if (!ret) 2548 if (!ret)
2493 ret = cgroup_attach_task(cgrp, tsk, threadgroup); 2549 ret = cgroup_attach_task(cgrp, tsk, threadgroup);
2494 2550
2495 put_task_struct(tsk); 2551 threadgroup_unlock(tsk);
2496 goto out_unlock_threadgroup;
2497 2552
2498out_unlock_rcu: 2553 put_task_struct(tsk);
2499 rcu_read_unlock(); 2554out_unlock_cgroup:
2500out_unlock_threadgroup:
2501 percpu_up_write(&cgroup_threadgroup_rwsem);
2502 cgroup_kn_unlock(of->kn); 2555 cgroup_kn_unlock(of->kn);
2503 return ret ?: nbytes; 2556 return ret ?: nbytes;
2504} 2557}
@@ -2643,8 +2696,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2643 2696
2644 lockdep_assert_held(&cgroup_mutex); 2697 lockdep_assert_held(&cgroup_mutex);
2645 2698
2646 percpu_down_write(&cgroup_threadgroup_rwsem);
2647
2648 /* look up all csses currently attached to @cgrp's subtree */ 2699 /* look up all csses currently attached to @cgrp's subtree */
2649 down_read(&css_set_rwsem); 2700 down_read(&css_set_rwsem);
2650 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { 2701 css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) {
@@ -2700,8 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2700 goto out_finish; 2751 goto out_finish;
2701 last_task = task; 2752 last_task = task;
2702 2753
2754 threadgroup_lock(task);
2755 /* raced against de_thread() from another thread? */
2756 if (!thread_group_leader(task)) {
2757 threadgroup_unlock(task);
2758 put_task_struct(task);
2759 continue;
2760 }
2761
2703 ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); 2762 ret = cgroup_migrate(src_cset->dfl_cgrp, task, true);
2704 2763
2764 threadgroup_unlock(task);
2705 put_task_struct(task); 2765 put_task_struct(task);
2706 2766
2707 if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) 2767 if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret))
@@ -2711,7 +2771,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp)
2711 2771
2712out_finish: 2772out_finish:
2713 cgroup_migrate_finish(&preloaded_csets); 2773 cgroup_migrate_finish(&preloaded_csets);
2714 percpu_up_write(&cgroup_threadgroup_rwsem);
2715 return ret; 2774 return ret;
2716} 2775}
2717 2776
@@ -5024,7 +5083,6 @@ int __init cgroup_init(void)
5024 unsigned long key; 5083 unsigned long key;
5025 int ssid, err; 5084 int ssid, err;
5026 5085
5027 BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem));
5028 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); 5086 BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
5029 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); 5087 BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
5030 5088
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f548f69c4299..b11756f9b6dc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event)
1243 PERF_EVENT_STATE_INACTIVE; 1243 PERF_EVENT_STATE_INACTIVE;
1244} 1244}
1245 1245
1246/* 1246static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
1247 * Called at perf_event creation and when events are attached/detached from a
1248 * group.
1249 */
1250static void perf_event__read_size(struct perf_event *event)
1251{ 1247{
1252 int entry = sizeof(u64); /* value */ 1248 int entry = sizeof(u64); /* value */
1253 int size = 0; 1249 int size = 0;
@@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event)
1263 entry += sizeof(u64); 1259 entry += sizeof(u64);
1264 1260
1265 if (event->attr.read_format & PERF_FORMAT_GROUP) { 1261 if (event->attr.read_format & PERF_FORMAT_GROUP) {
1266 nr += event->group_leader->nr_siblings; 1262 nr += nr_siblings;
1267 size += sizeof(u64); 1263 size += sizeof(u64);
1268 } 1264 }
1269 1265
@@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event)
1271 event->read_size = size; 1267 event->read_size = size;
1272} 1268}
1273 1269
1274static void perf_event__header_size(struct perf_event *event) 1270static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
1275{ 1271{
1276 struct perf_sample_data *data; 1272 struct perf_sample_data *data;
1277 u64 sample_type = event->attr.sample_type;
1278 u16 size = 0; 1273 u16 size = 0;
1279 1274
1280 perf_event__read_size(event);
1281
1282 if (sample_type & PERF_SAMPLE_IP) 1275 if (sample_type & PERF_SAMPLE_IP)
1283 size += sizeof(data->ip); 1276 size += sizeof(data->ip);
1284 1277
@@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event)
1303 event->header_size = size; 1296 event->header_size = size;
1304} 1297}
1305 1298
1299/*
1300 * Called at perf_event creation and when events are attached/detached from a
1301 * group.
1302 */
1303static void perf_event__header_size(struct perf_event *event)
1304{
1305 __perf_event_read_size(event,
1306 event->group_leader->nr_siblings);
1307 __perf_event_header_size(event, event->attr.sample_type);
1308}
1309
1306static void perf_event__id_header_size(struct perf_event *event) 1310static void perf_event__id_header_size(struct perf_event *event)
1307{ 1311{
1308 struct perf_sample_data *data; 1312 struct perf_sample_data *data;
@@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event)
1330 event->id_header_size = size; 1334 event->id_header_size = size;
1331} 1335}
1332 1336
1337static bool perf_event_validate_size(struct perf_event *event)
1338{
1339 /*
1340 * The values computed here will be over-written when we actually
1341 * attach the event.
1342 */
1343 __perf_event_read_size(event, event->group_leader->nr_siblings + 1);
1344 __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
1345 perf_event__id_header_size(event);
1346
1347 /*
1348 * Sum the lot; should not exceed the 64k limit we have on records.
1349 * Conservative limit to allow for callchains and other variable fields.
1350 */
1351 if (event->read_size + event->header_size +
1352 event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
1353 return false;
1354
1355 return true;
1356}
1357
1333static void perf_group_attach(struct perf_event *event) 1358static void perf_group_attach(struct perf_event *event)
1334{ 1359{
1335 struct perf_event *group_leader = event->group_leader, *pos; 1360 struct perf_event *group_leader = event->group_leader, *pos;
@@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open,
8297 8322
8298 if (move_group) { 8323 if (move_group) {
8299 gctx = group_leader->ctx; 8324 gctx = group_leader->ctx;
8325 mutex_lock_double(&gctx->mutex, &ctx->mutex);
8326 } else {
8327 mutex_lock(&ctx->mutex);
8328 }
8300 8329
8330 if (!perf_event_validate_size(event)) {
8331 err = -E2BIG;
8332 goto err_locked;
8333 }
8334
8335 /*
8336 * Must be under the same ctx::mutex as perf_install_in_context(),
8337 * because we need to serialize with concurrent event creation.
8338 */
8339 if (!exclusive_event_installable(event, ctx)) {
8340 /* exclusive and group stuff are assumed mutually exclusive */
8341 WARN_ON_ONCE(move_group);
8342
8343 err = -EBUSY;
8344 goto err_locked;
8345 }
8346
8347 WARN_ON_ONCE(ctx->parent_ctx);
8348
8349 if (move_group) {
8301 /* 8350 /*
8302 * See perf_event_ctx_lock() for comments on the details 8351 * See perf_event_ctx_lock() for comments on the details
8303 * of swizzling perf_event::ctx. 8352 * of swizzling perf_event::ctx.
8304 */ 8353 */
8305 mutex_lock_double(&gctx->mutex, &ctx->mutex);
8306
8307 perf_remove_from_context(group_leader, false); 8354 perf_remove_from_context(group_leader, false);
8308 8355
8309 list_for_each_entry(sibling, &group_leader->sibling_list, 8356 list_for_each_entry(sibling, &group_leader->sibling_list,
@@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open,
8311 perf_remove_from_context(sibling, false); 8358 perf_remove_from_context(sibling, false);
8312 put_ctx(gctx); 8359 put_ctx(gctx);
8313 } 8360 }
8314 } else {
8315 mutex_lock(&ctx->mutex);
8316 }
8317 8361
8318 WARN_ON_ONCE(ctx->parent_ctx);
8319
8320 if (move_group) {
8321 /* 8362 /*
8322 * Wait for everybody to stop referencing the events through 8363 * Wait for everybody to stop referencing the events through
8323 * the old lists, before installing it on new lists. 8364 * the old lists, before installing it on new lists.
@@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open,
8349 perf_event__state_init(group_leader); 8390 perf_event__state_init(group_leader);
8350 perf_install_in_context(ctx, group_leader, group_leader->cpu); 8391 perf_install_in_context(ctx, group_leader, group_leader->cpu);
8351 get_ctx(ctx); 8392 get_ctx(ctx);
8352 }
8353 8393
8354 if (!exclusive_event_installable(event, ctx)) { 8394 /*
8355 err = -EBUSY; 8395 * Now that all events are installed in @ctx, nothing
8356 mutex_unlock(&ctx->mutex); 8396 * references @gctx anymore, so drop the last reference we have
8357 fput(event_file); 8397 * on it.
8358 goto err_context; 8398 */
8399 put_ctx(gctx);
8359 } 8400 }
8360 8401
8402 /*
8403 * Precalculate sample_data sizes; do while holding ctx::mutex such
8404 * that we're serialized against further additions and before
8405 * perf_install_in_context() which is the point the event is active and
8406 * can use these values.
8407 */
8408 perf_event__header_size(event);
8409 perf_event__id_header_size(event);
8410
8361 perf_install_in_context(ctx, event, event->cpu); 8411 perf_install_in_context(ctx, event, event->cpu);
8362 perf_unpin_context(ctx); 8412 perf_unpin_context(ctx);
8363 8413
8364 if (move_group) { 8414 if (move_group)
8365 mutex_unlock(&gctx->mutex); 8415 mutex_unlock(&gctx->mutex);
8366 put_ctx(gctx);
8367 }
8368 mutex_unlock(&ctx->mutex); 8416 mutex_unlock(&ctx->mutex);
8369 8417
8370 put_online_cpus(); 8418 put_online_cpus();
@@ -8376,12 +8424,6 @@ SYSCALL_DEFINE5(perf_event_open,
8376 mutex_unlock(&current->perf_event_mutex); 8424 mutex_unlock(&current->perf_event_mutex);
8377 8425
8378 /* 8426 /*
8379 * Precalculate sample_data sizes
8380 */
8381 perf_event__header_size(event);
8382 perf_event__id_header_size(event);
8383
8384 /*
8385 * Drop the reference on the group_event after placing the 8427 * Drop the reference on the group_event after placing the
8386 * new event on the sibling_list. This ensures destruction 8428 * new event on the sibling_list. This ensures destruction
8387 * of the group leader will find the pointer to itself in 8429 * of the group leader will find the pointer to itself in
@@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open,
8391 fd_install(event_fd, event_file); 8433 fd_install(event_fd, event_file);
8392 return event_fd; 8434 return event_fd;
8393 8435
8436err_locked:
8437 if (move_group)
8438 mutex_unlock(&gctx->mutex);
8439 mutex_unlock(&ctx->mutex);
8440/* err_file: */
8441 fput(event_file);
8394err_context: 8442err_context:
8395 perf_unpin_context(ctx); 8443 perf_unpin_context(ctx);
8396 put_ctx(ctx); 8444 put_ctx(ctx);
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d5f0f118a63..2845623fb582 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1149,6 +1149,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
1149 tty_audit_fork(sig); 1149 tty_audit_fork(sig);
1150 sched_autogroup_fork(sig); 1150 sched_autogroup_fork(sig);
1151 1151
1152#ifdef CONFIG_CGROUPS
1153 init_rwsem(&sig->group_rwsem);
1154#endif
1155
1152 sig->oom_score_adj = current->signal->oom_score_adj; 1156 sig->oom_score_adj = current->signal->oom_score_adj;
1153 sig->oom_score_adj_min = current->signal->oom_score_adj_min; 1157 sig->oom_score_adj_min = current->signal->oom_score_adj_min;
1154 1158
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6e40a9539763..e28169dd1c36 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data)
83 83
84 if (!desc) 84 if (!desc)
85 return -EINVAL; 85 return -EINVAL;
86 desc->irq_data.handler_data = data; 86 desc->irq_common_data.handler_data = data;
87 irq_put_desc_unlock(desc, flags); 87 irq_put_desc_unlock(desc, flags);
88 return 0; 88 return 0;
89} 89}
@@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset,
105 105
106 if (!desc) 106 if (!desc)
107 return -EINVAL; 107 return -EINVAL;
108 desc->irq_data.msi_desc = entry; 108 desc->irq_common_data.msi_desc = entry;
109 if (entry && !irq_offset) 109 if (entry && !irq_offset)
110 entry->irq = irq_base; 110 entry->irq = irq_base;
111 irq_put_desc_unlock(desc, flags); 111 irq_put_desc_unlock(desc, flags);
@@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc)
372 372
373/** 373/**
374 * handle_simple_irq - Simple and software-decoded IRQs. 374 * handle_simple_irq - Simple and software-decoded IRQs.
375 * @irq: the interrupt number
376 * @desc: the interrupt description structure for this irq 375 * @desc: the interrupt description structure for this irq
377 * 376 *
378 * Simple interrupts are either sent from a demultiplexing interrupt 377 * Simple interrupts are either sent from a demultiplexing interrupt
@@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc)
382 * Note: The caller is expected to handle the ack, clear, mask and 381 * Note: The caller is expected to handle the ack, clear, mask and
383 * unmask issues if necessary. 382 * unmask issues if necessary.
384 */ 383 */
385void 384void handle_simple_irq(struct irq_desc *desc)
386handle_simple_irq(unsigned int irq, struct irq_desc *desc)
387{ 385{
388 raw_spin_lock(&desc->lock); 386 raw_spin_lock(&desc->lock);
389 387
@@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc)
425 423
426/** 424/**
427 * handle_level_irq - Level type irq handler 425 * handle_level_irq - Level type irq handler
428 * @irq: the interrupt number
429 * @desc: the interrupt description structure for this irq 426 * @desc: the interrupt description structure for this irq
430 * 427 *
431 * Level type interrupts are active as long as the hardware line has 428 * Level type interrupts are active as long as the hardware line has
@@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc)
433 * it after the associated handler has acknowledged the device, so the 430 * it after the associated handler has acknowledged the device, so the
434 * interrupt line is back to inactive. 431 * interrupt line is back to inactive.
435 */ 432 */
436void 433void handle_level_irq(struct irq_desc *desc)
437handle_level_irq(unsigned int irq, struct irq_desc *desc)
438{ 434{
439 raw_spin_lock(&desc->lock); 435 raw_spin_lock(&desc->lock);
440 mask_ack_irq(desc); 436 mask_ack_irq(desc);
@@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
496 492
497/** 493/**
498 * handle_fasteoi_irq - irq handler for transparent controllers 494 * handle_fasteoi_irq - irq handler for transparent controllers
499 * @irq: the interrupt number
500 * @desc: the interrupt description structure for this irq 495 * @desc: the interrupt description structure for this irq
501 * 496 *
502 * Only a single callback will be issued to the chip: an ->eoi() 497 * Only a single callback will be issued to the chip: an ->eoi()
@@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip)
504 * for modern forms of interrupt handlers, which handle the flow 499 * for modern forms of interrupt handlers, which handle the flow
505 * details in hardware, transparently. 500 * details in hardware, transparently.
506 */ 501 */
507void 502void handle_fasteoi_irq(struct irq_desc *desc)
508handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
509{ 503{
510 struct irq_chip *chip = desc->irq_data.chip; 504 struct irq_chip *chip = desc->irq_data.chip;
511 505
@@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
546 540
547/** 541/**
548 * handle_edge_irq - edge type IRQ handler 542 * handle_edge_irq - edge type IRQ handler
549 * @irq: the interrupt number
550 * @desc: the interrupt description structure for this irq 543 * @desc: the interrupt description structure for this irq
551 * 544 *
552 * Interrupt occures on the falling and/or rising edge of a hardware 545 * Interrupt occures on the falling and/or rising edge of a hardware
@@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq);
560 * the handler was running. If all pending interrupts are handled, the 553 * the handler was running. If all pending interrupts are handled, the
561 * loop is left. 554 * loop is left.
562 */ 555 */
563void 556void handle_edge_irq(struct irq_desc *desc)
564handle_edge_irq(unsigned int irq, struct irq_desc *desc)
565{ 557{
566 raw_spin_lock(&desc->lock); 558 raw_spin_lock(&desc->lock);
567 559
@@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq);
618#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER 610#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
619/** 611/**
620 * handle_edge_eoi_irq - edge eoi type IRQ handler 612 * handle_edge_eoi_irq - edge eoi type IRQ handler
621 * @irq: the interrupt number
622 * @desc: the interrupt description structure for this irq 613 * @desc: the interrupt description structure for this irq
623 * 614 *
624 * Similar as the above handle_edge_irq, but using eoi and w/o the 615 * Similar as the above handle_edge_irq, but using eoi and w/o the
625 * mask/unmask logic. 616 * mask/unmask logic.
626 */ 617 */
627void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) 618void handle_edge_eoi_irq(struct irq_desc *desc)
628{ 619{
629 struct irq_chip *chip = irq_desc_get_chip(desc); 620 struct irq_chip *chip = irq_desc_get_chip(desc);
630 621
@@ -665,13 +656,11 @@ out_eoi:
665 656
666/** 657/**
667 * handle_percpu_irq - Per CPU local irq handler 658 * handle_percpu_irq - Per CPU local irq handler
668 * @irq: the interrupt number
669 * @desc: the interrupt description structure for this irq 659 * @desc: the interrupt description structure for this irq
670 * 660 *
671 * Per CPU interrupts on SMP machines without locking requirements 661 * Per CPU interrupts on SMP machines without locking requirements
672 */ 662 */
673void 663void handle_percpu_irq(struct irq_desc *desc)
674handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
675{ 664{
676 struct irq_chip *chip = irq_desc_get_chip(desc); 665 struct irq_chip *chip = irq_desc_get_chip(desc);
677 666
@@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
688 677
689/** 678/**
690 * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids 679 * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids
691 * @irq: the interrupt number
692 * @desc: the interrupt description structure for this irq 680 * @desc: the interrupt description structure for this irq
693 * 681 *
694 * Per CPU interrupts on SMP machines without locking requirements. Same as 682 * Per CPU interrupts on SMP machines without locking requirements. Same as
@@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
698 * contain the real device id for the cpu on which this handler is 686 * contain the real device id for the cpu on which this handler is
699 * called 687 * called
700 */ 688 */
701void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) 689void handle_percpu_devid_irq(struct irq_desc *desc)
702{ 690{
703 struct irq_chip *chip = irq_desc_get_chip(desc); 691 struct irq_chip *chip = irq_desc_get_chip(desc);
704 struct irqaction *action = desc->action; 692 struct irqaction *action = desc->action;
705 void *dev_id = raw_cpu_ptr(action->percpu_dev_id); 693 void *dev_id = raw_cpu_ptr(action->percpu_dev_id);
694 unsigned int irq = irq_desc_get_irq(desc);
706 irqreturn_t res; 695 irqreturn_t res;
707 696
708 kstat_incr_irqs_this_cpu(desc); 697 kstat_incr_irqs_this_cpu(desc);
@@ -796,7 +785,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle,
796 return; 785 return;
797 786
798 __irq_do_set_handler(desc, handle, 1, NULL); 787 __irq_do_set_handler(desc, handle, 1, NULL);
799 desc->irq_data.handler_data = data; 788 desc->irq_common_data.handler_data = data;
800 789
801 irq_put_desc_busunlock(desc, flags); 790 irq_put_desc_busunlock(desc, flags);
802} 791}
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index b6eeea8a80c5..e25a83b67cce 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -22,17 +22,19 @@
22 22
23/** 23/**
24 * handle_bad_irq - handle spurious and unhandled irqs 24 * handle_bad_irq - handle spurious and unhandled irqs
25 * @irq: the interrupt number
26 * @desc: description of the interrupt 25 * @desc: description of the interrupt
27 * 26 *
28 * Handles spurious and unhandled IRQ's. It also prints a debugmessage. 27 * Handles spurious and unhandled IRQ's. It also prints a debugmessage.
29 */ 28 */
30void handle_bad_irq(unsigned int irq, struct irq_desc *desc) 29void handle_bad_irq(struct irq_desc *desc)
31{ 30{
31 unsigned int irq = irq_desc_get_irq(desc);
32
32 print_irq_desc(irq, desc); 33 print_irq_desc(irq, desc);
33 kstat_incr_irqs_this_cpu(desc); 34 kstat_incr_irqs_this_cpu(desc);
34 ack_bad_irq(irq); 35 ack_bad_irq(irq);
35} 36}
37EXPORT_SYMBOL_GPL(handle_bad_irq);
36 38
37/* 39/*
38 * Special, empty irq handler: 40 * Special, empty irq handler:
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index eee4b385cffb..5ef0c2dbe930 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
194 194
195static inline int irq_desc_get_node(struct irq_desc *desc) 195static inline int irq_desc_get_node(struct irq_desc *desc)
196{ 196{
197 return irq_data_get_node(&desc->irq_data); 197 return irq_common_data_get_node(&desc->irq_common_data);
198} 198}
199 199
200#ifdef CONFIG_PM_SLEEP 200#ifdef CONFIG_PM_SLEEP
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 0a2a4b697bcb..239e2ae2c947 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void)
38#ifdef CONFIG_SMP 38#ifdef CONFIG_SMP
39static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) 39static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
40{ 40{
41 if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) 41 if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity,
42 gfp, node))
42 return -ENOMEM; 43 return -ENOMEM;
43 44
44#ifdef CONFIG_GENERIC_PENDING_IRQ 45#ifdef CONFIG_GENERIC_PENDING_IRQ
45 if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { 46 if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) {
46 free_cpumask_var(desc->irq_data.affinity); 47 free_cpumask_var(desc->irq_common_data.affinity);
47 return -ENOMEM; 48 return -ENOMEM;
48 } 49 }
49#endif 50#endif
@@ -52,11 +53,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node)
52 53
53static void desc_smp_init(struct irq_desc *desc, int node) 54static void desc_smp_init(struct irq_desc *desc, int node)
54{ 55{
55 desc->irq_data.node = node; 56 cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity);
56 cpumask_copy(desc->irq_data.affinity, irq_default_affinity);
57#ifdef CONFIG_GENERIC_PENDING_IRQ 57#ifdef CONFIG_GENERIC_PENDING_IRQ
58 cpumask_clear(desc->pending_mask); 58 cpumask_clear(desc->pending_mask);
59#endif 59#endif
60#ifdef CONFIG_NUMA
61 desc->irq_common_data.node = node;
62#endif
60} 63}
61 64
62#else 65#else
@@ -70,12 +73,13 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node,
70{ 73{
71 int cpu; 74 int cpu;
72 75
76 desc->irq_common_data.handler_data = NULL;
77 desc->irq_common_data.msi_desc = NULL;
78
73 desc->irq_data.common = &desc->irq_common_data; 79 desc->irq_data.common = &desc->irq_common_data;
74 desc->irq_data.irq = irq; 80 desc->irq_data.irq = irq;
75 desc->irq_data.chip = &no_irq_chip; 81 desc->irq_data.chip = &no_irq_chip;
76 desc->irq_data.chip_data = NULL; 82 desc->irq_data.chip_data = NULL;
77 desc->irq_data.handler_data = NULL;
78 desc->irq_data.msi_desc = NULL;
79 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); 83 irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
80 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); 84 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
81 desc->handle_irq = handle_bad_irq; 85 desc->handle_irq = handle_bad_irq;
@@ -121,7 +125,7 @@ static void free_masks(struct irq_desc *desc)
121#ifdef CONFIG_GENERIC_PENDING_IRQ 125#ifdef CONFIG_GENERIC_PENDING_IRQ
122 free_cpumask_var(desc->pending_mask); 126 free_cpumask_var(desc->pending_mask);
123#endif 127#endif
124 free_cpumask_var(desc->irq_data.affinity); 128 free_cpumask_var(desc->irq_common_data.affinity);
125} 129}
126#else 130#else
127static inline void free_masks(struct irq_desc *desc) { } 131static inline void free_masks(struct irq_desc *desc) { }
@@ -343,7 +347,7 @@ int generic_handle_irq(unsigned int irq)
343 347
344 if (!desc) 348 if (!desc)
345 return -EINVAL; 349 return -EINVAL;
346 generic_handle_irq_desc(irq, desc); 350 generic_handle_irq_desc(desc);
347 return 0; 351 return 0;
348} 352}
349EXPORT_SYMBOL_GPL(generic_handle_irq); 353EXPORT_SYMBOL_GPL(generic_handle_irq);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 79baaf8a7813..dc9d27c0c158 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain,
844 child->parent_data = irq_data; 844 child->parent_data = irq_data;
845 irq_data->irq = child->irq; 845 irq_data->irq = child->irq;
846 irq_data->common = child->common; 846 irq_data->common = child->common;
847 irq_data->node = child->node;
848 irq_data->domain = domain; 847 irq_data->domain = domain;
849 } 848 }
850 849
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index ad1b064f94fe..f9a59f6cabd2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
192 switch (ret) { 192 switch (ret) {
193 case IRQ_SET_MASK_OK: 193 case IRQ_SET_MASK_OK:
194 case IRQ_SET_MASK_OK_DONE: 194 case IRQ_SET_MASK_OK_DONE:
195 cpumask_copy(data->affinity, mask); 195 cpumask_copy(desc->irq_common_data.affinity, mask);
196 case IRQ_SET_MASK_OK_NOCOPY: 196 case IRQ_SET_MASK_OK_NOCOPY:
197 irq_set_thread_affinity(desc); 197 irq_set_thread_affinity(desc);
198 ret = 0; 198 ret = 0;
@@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work)
304 if (irq_move_pending(&desc->irq_data)) 304 if (irq_move_pending(&desc->irq_data))
305 irq_get_pending(cpumask, desc); 305 irq_get_pending(cpumask, desc);
306 else 306 else
307 cpumask_copy(cpumask, desc->irq_data.affinity); 307 cpumask_copy(cpumask, desc->irq_common_data.affinity);
308 raw_spin_unlock_irqrestore(&desc->lock, flags); 308 raw_spin_unlock_irqrestore(&desc->lock, flags);
309 309
310 notify->notify(notify, cpumask); 310 notify->notify(notify, cpumask);
@@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask)
375 * one of the targets is online. 375 * one of the targets is online.
376 */ 376 */
377 if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { 377 if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) {
378 if (cpumask_intersects(desc->irq_data.affinity, 378 if (cpumask_intersects(desc->irq_common_data.affinity,
379 cpu_online_mask)) 379 cpu_online_mask))
380 set = desc->irq_data.affinity; 380 set = desc->irq_common_data.affinity;
381 else 381 else
382 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); 382 irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
383 } 383 }
@@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action)
829 * This code is triggered unconditionally. Check the affinity 829 * This code is triggered unconditionally. Check the affinity
830 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. 830 * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out.
831 */ 831 */
832 if (desc->irq_data.affinity) 832 if (desc->irq_common_data.affinity)
833 cpumask_copy(mask, desc->irq_data.affinity); 833 cpumask_copy(mask, desc->irq_common_data.affinity);
834 else 834 else
835 valid = false; 835 valid = false;
836 raw_spin_unlock_irq(&desc->lock); 836 raw_spin_unlock_irq(&desc->lock);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 7e6512b9dc1f..be9149f62eb8 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -228,11 +228,7 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info)
228{ 228{
229 struct irq_chip *chip = info->chip; 229 struct irq_chip *chip = info->chip;
230 230
231 BUG_ON(!chip); 231 BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask);
232 if (!chip->irq_mask)
233 chip->irq_mask = pci_msi_mask_irq;
234 if (!chip->irq_unmask)
235 chip->irq_unmask = pci_msi_unmask_irq;
236 if (!chip->irq_set_affinity) 232 if (!chip->irq_set_affinity)
237 chip->irq_set_affinity = msi_domain_set_affinity; 233 chip->irq_set_affinity = msi_domain_set_affinity;
238} 234}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 0e97c142ce40..a50ddc9417ff 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -12,6 +12,7 @@
12#include <linux/seq_file.h> 12#include <linux/seq_file.h>
13#include <linux/interrupt.h> 13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h> 14#include <linux/kernel_stat.h>
15#include <linux/mutex.h>
15 16
16#include "internals.h" 17#include "internals.h"
17 18
@@ -39,7 +40,7 @@ static struct proc_dir_entry *root_irq_dir;
39static int show_irq_affinity(int type, struct seq_file *m, void *v) 40static int show_irq_affinity(int type, struct seq_file *m, void *v)
40{ 41{
41 struct irq_desc *desc = irq_to_desc((long)m->private); 42 struct irq_desc *desc = irq_to_desc((long)m->private);
42 const struct cpumask *mask = desc->irq_data.affinity; 43 const struct cpumask *mask = desc->irq_common_data.affinity;
43 44
44#ifdef CONFIG_GENERIC_PENDING_IRQ 45#ifdef CONFIG_GENERIC_PENDING_IRQ
45 if (irqd_is_setaffinity_pending(&desc->irq_data)) 46 if (irqd_is_setaffinity_pending(&desc->irq_data))
@@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action)
323 324
324void register_irq_proc(unsigned int irq, struct irq_desc *desc) 325void register_irq_proc(unsigned int irq, struct irq_desc *desc)
325{ 326{
327 static DEFINE_MUTEX(register_lock);
326 char name [MAX_NAMELEN]; 328 char name [MAX_NAMELEN];
327 329
328 if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) 330 if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip))
329 return; 331 return;
330 332
333 /*
334 * irq directories are registered only when a handler is
335 * added, not when the descriptor is created, so multiple
336 * tasks might try to register at the same time.
337 */
338 mutex_lock(&register_lock);
339
340 if (desc->dir)
341 goto out_unlock;
342
331 memset(name, 0, MAX_NAMELEN); 343 memset(name, 0, MAX_NAMELEN);
332 sprintf(name, "%d", irq); 344 sprintf(name, "%d", irq);
333 345
334 /* create /proc/irq/1234 */ 346 /* create /proc/irq/1234 */
335 desc->dir = proc_mkdir(name, root_irq_dir); 347 desc->dir = proc_mkdir(name, root_irq_dir);
336 if (!desc->dir) 348 if (!desc->dir)
337 return; 349 goto out_unlock;
338 350
339#ifdef CONFIG_SMP 351#ifdef CONFIG_SMP
340 /* create /proc/irq/<irq>/smp_affinity */ 352 /* create /proc/irq/<irq>/smp_affinity */
@@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
355 367
356 proc_create_data("spurious", 0444, desc->dir, 368 proc_create_data("spurious", 0444, desc->dir,
357 &irq_spurious_proc_fops, (void *)(long)irq); 369 &irq_spurious_proc_fops, (void *)(long)irq);
370
371out_unlock:
372 mutex_unlock(&register_lock);
358} 373}
359 374
360void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) 375void unregister_irq_proc(unsigned int irq, struct irq_desc *desc)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index dd95f44f99b2..b86886beee4f 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg)
38 clear_bit(irq, irqs_resend); 38 clear_bit(irq, irqs_resend);
39 desc = irq_to_desc(irq); 39 desc = irq_to_desc(irq);
40 local_irq_disable(); 40 local_irq_disable();
41 desc->handle_irq(irq, desc); 41 desc->handle_irq(desc);
42 local_irq_enable(); 42 local_irq_enable();
43 } 43 }
44} 44}
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 8acfbf773e06..4e49cc4c9952 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock);
3068static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, 3068static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3069 int trylock, int read, int check, int hardirqs_off, 3069 int trylock, int read, int check, int hardirqs_off,
3070 struct lockdep_map *nest_lock, unsigned long ip, 3070 struct lockdep_map *nest_lock, unsigned long ip,
3071 int references) 3071 int references, int pin_count)
3072{ 3072{
3073 struct task_struct *curr = current; 3073 struct task_struct *curr = current;
3074 struct lock_class *class = NULL; 3074 struct lock_class *class = NULL;
@@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3157 hlock->waittime_stamp = 0; 3157 hlock->waittime_stamp = 0;
3158 hlock->holdtime_stamp = lockstat_clock(); 3158 hlock->holdtime_stamp = lockstat_clock();
3159#endif 3159#endif
3160 hlock->pin_count = 0; 3160 hlock->pin_count = pin_count;
3161 3161
3162 if (check && !mark_irqflags(curr, hlock)) 3162 if (check && !mark_irqflags(curr, hlock))
3163 return 0; 3163 return 0;
@@ -3343,7 +3343,7 @@ found_it:
3343 hlock_class(hlock)->subclass, hlock->trylock, 3343 hlock_class(hlock)->subclass, hlock->trylock,
3344 hlock->read, hlock->check, hlock->hardirqs_off, 3344 hlock->read, hlock->check, hlock->hardirqs_off,
3345 hlock->nest_lock, hlock->acquire_ip, 3345 hlock->nest_lock, hlock->acquire_ip,
3346 hlock->references)) 3346 hlock->references, hlock->pin_count))
3347 return 0; 3347 return 0;
3348 } 3348 }
3349 3349
@@ -3433,7 +3433,7 @@ found_it:
3433 hlock_class(hlock)->subclass, hlock->trylock, 3433 hlock_class(hlock)->subclass, hlock->trylock,
3434 hlock->read, hlock->check, hlock->hardirqs_off, 3434 hlock->read, hlock->check, hlock->hardirqs_off,
3435 hlock->nest_lock, hlock->acquire_ip, 3435 hlock->nest_lock, hlock->acquire_ip,
3436 hlock->references)) 3436 hlock->references, hlock->pin_count))
3437 return 0; 3437 return 0;
3438 } 3438 }
3439 3439
@@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
3583 current->lockdep_recursion = 1; 3583 current->lockdep_recursion = 1;
3584 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); 3584 trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
3585 __lock_acquire(lock, subclass, trylock, read, check, 3585 __lock_acquire(lock, subclass, trylock, read, check,
3586 irqs_disabled_flags(flags), nest_lock, ip, 0); 3586 irqs_disabled_flags(flags), nest_lock, ip, 0, 0);
3587 current->lockdep_recursion = 0; 3587 current->lockdep_recursion = 0;
3588 raw_local_irq_restore(flags); 3588 raw_local_irq_restore(flags);
3589} 3589}
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 337c8818541d..87e9ce6a63c5 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
289 if (pv_enabled()) 289 if (pv_enabled())
290 goto queue; 290 goto queue;
291 291
292 if (virt_queued_spin_lock(lock)) 292 if (virt_spin_lock(lock))
293 return; 293 return;
294 294
295 /* 295 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 9f75f25cc5d9..775d36cc0050 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
3868static void __init 3868static void __init
3869rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 3869rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3870{ 3870{
3871 static struct lock_class_key rcu_exp_sched_rdp_class;
3871 unsigned long flags; 3872 unsigned long flags;
3872 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3873 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3873 struct rcu_node *rnp = rcu_get_root(rsp); 3874 struct rcu_node *rnp = rcu_get_root(rsp);
@@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
3883 mutex_init(&rdp->exp_funnel_mutex); 3884 mutex_init(&rdp->exp_funnel_mutex);
3884 rcu_boot_init_nocb_percpu_data(rdp); 3885 rcu_boot_init_nocb_percpu_data(rdp);
3885 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3886 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3887 if (rsp == &rcu_sched_state)
3888 lockdep_set_class_and_name(&rdp->exp_funnel_mutex,
3889 &rcu_exp_sched_rdp_class,
3890 "rcu_data_exp_sched");
3886} 3891}
3887 3892
3888/* 3893/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3595403921bd..10a8faa1b0d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -621,18 +621,21 @@ int get_nohz_timer_target(void)
621 int i, cpu = smp_processor_id(); 621 int i, cpu = smp_processor_id();
622 struct sched_domain *sd; 622 struct sched_domain *sd;
623 623
624 if (!idle_cpu(cpu)) 624 if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu))
625 return cpu; 625 return cpu;
626 626
627 rcu_read_lock(); 627 rcu_read_lock();
628 for_each_domain(cpu, sd) { 628 for_each_domain(cpu, sd) {
629 for_each_cpu(i, sched_domain_span(sd)) { 629 for_each_cpu(i, sched_domain_span(sd)) {
630 if (!idle_cpu(i)) { 630 if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) {
631 cpu = i; 631 cpu = i;
632 goto unlock; 632 goto unlock;
633 } 633 }
634 } 634 }
635 } 635 }
636
637 if (!is_housekeeping_cpu(cpu))
638 cpu = housekeeping_any_cpu();
636unlock: 639unlock:
637 rcu_read_unlock(); 640 rcu_read_unlock();
638 return cpu; 641 return cpu;
@@ -2514,11 +2517,11 @@ static struct rq *finish_task_switch(struct task_struct *prev)
2514 * If a task dies, then it sets TASK_DEAD in tsk->state and calls 2517 * If a task dies, then it sets TASK_DEAD in tsk->state and calls
2515 * schedule one last time. The schedule call will never return, and 2518 * schedule one last time. The schedule call will never return, and
2516 * the scheduled task must drop that reference. 2519 * the scheduled task must drop that reference.
2517 * The test for TASK_DEAD must occur while the runqueue locks are 2520 *
2518 * still held, otherwise prev could be scheduled on another cpu, die 2521 * We must observe prev->state before clearing prev->on_cpu (in
2519 * there before we look at prev->state, and then the reference would 2522 * finish_lock_switch), otherwise a concurrent wakeup can get prev
2520 * be dropped twice. 2523 * running on another CPU and we could rave with its RUNNING -> DEAD
2521 * Manfred Spraul <manfred@colorfullife.com> 2524 * transition, resulting in a double drop.
2522 */ 2525 */
2523 prev_state = prev->state; 2526 prev_state = prev->state;
2524 vtime_task_switch(prev); 2527 vtime_task_switch(prev);
@@ -2666,13 +2669,20 @@ unsigned long nr_running(void)
2666 2669
2667/* 2670/*
2668 * Check if only the current task is running on the cpu. 2671 * Check if only the current task is running on the cpu.
2672 *
2673 * Caution: this function does not check that the caller has disabled
2674 * preemption, thus the result might have a time-of-check-to-time-of-use
2675 * race. The caller is responsible to use it correctly, for example:
2676 *
2677 * - from a non-preemptable section (of course)
2678 *
2679 * - from a thread that is bound to a single CPU
2680 *
2681 * - in a loop with very short iterations (e.g. a polling loop)
2669 */ 2682 */
2670bool single_task_running(void) 2683bool single_task_running(void)
2671{ 2684{
2672 if (cpu_rq(smp_processor_id())->nr_running == 1) 2685 return raw_rq()->nr_running == 1;
2673 return true;
2674 else
2675 return false;
2676} 2686}
2677EXPORT_SYMBOL(single_task_running); 2687EXPORT_SYMBOL(single_task_running);
2678 2688
@@ -4924,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu)
4924 idle->state = TASK_RUNNING; 4934 idle->state = TASK_RUNNING;
4925 idle->se.exec_start = sched_clock(); 4935 idle->se.exec_start = sched_clock();
4926 4936
4927 do_set_cpus_allowed(idle, cpumask_of(cpu)); 4937#ifdef CONFIG_SMP
4938 /*
4939 * Its possible that init_idle() gets called multiple times on a task,
4940 * in that case do_set_cpus_allowed() will not do the right thing.
4941 *
4942 * And since this is boot we can forgo the serialization.
4943 */
4944 set_cpus_allowed_common(idle, cpumask_of(cpu));
4945#endif
4928 /* 4946 /*
4929 * We're having a chicken and egg problem, even though we are 4947 * We're having a chicken and egg problem, even though we are
4930 * holding rq->lock, the cpu isn't yet set to this cpu so the 4948 * holding rq->lock, the cpu isn't yet set to this cpu so the
@@ -4941,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu)
4941 4959
4942 rq->curr = rq->idle = idle; 4960 rq->curr = rq->idle = idle;
4943 idle->on_rq = TASK_ON_RQ_QUEUED; 4961 idle->on_rq = TASK_ON_RQ_QUEUED;
4944#if defined(CONFIG_SMP) 4962#ifdef CONFIG_SMP
4945 idle->on_cpu = 1; 4963 idle->on_cpu = 1;
4946#endif 4964#endif
4947 raw_spin_unlock(&rq->lock); 4965 raw_spin_unlock(&rq->lock);
@@ -4956,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu)
4956 idle->sched_class = &idle_sched_class; 4974 idle->sched_class = &idle_sched_class;
4957 ftrace_graph_init_idle_task(idle, cpu); 4975 ftrace_graph_init_idle_task(idle, cpu);
4958 vtime_init_idle(idle, cpu); 4976 vtime_init_idle(idle, cpu);
4959#if defined(CONFIG_SMP) 4977#ifdef CONFIG_SMP
4960 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); 4978 sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
4961#endif 4979#endif
4962} 4980}
@@ -5178,24 +5196,47 @@ static void migrate_tasks(struct rq *dead_rq)
5178 break; 5196 break;
5179 5197
5180 /* 5198 /*
5181 * Ensure rq->lock covers the entire task selection 5199 * pick_next_task assumes pinned rq->lock.
5182 * until the migration.
5183 */ 5200 */
5184 lockdep_pin_lock(&rq->lock); 5201 lockdep_pin_lock(&rq->lock);
5185 next = pick_next_task(rq, &fake_task); 5202 next = pick_next_task(rq, &fake_task);
5186 BUG_ON(!next); 5203 BUG_ON(!next);
5187 next->sched_class->put_prev_task(rq, next); 5204 next->sched_class->put_prev_task(rq, next);
5188 5205
5206 /*
5207 * Rules for changing task_struct::cpus_allowed are holding
5208 * both pi_lock and rq->lock, such that holding either
5209 * stabilizes the mask.
5210 *
5211 * Drop rq->lock is not quite as disastrous as it usually is
5212 * because !cpu_active at this point, which means load-balance
5213 * will not interfere. Also, stop-machine.
5214 */
5215 lockdep_unpin_lock(&rq->lock);
5216 raw_spin_unlock(&rq->lock);
5217 raw_spin_lock(&next->pi_lock);
5218 raw_spin_lock(&rq->lock);
5219
5220 /*
5221 * Since we're inside stop-machine, _nothing_ should have
5222 * changed the task, WARN if weird stuff happened, because in
5223 * that case the above rq->lock drop is a fail too.
5224 */
5225 if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) {
5226 raw_spin_unlock(&next->pi_lock);
5227 continue;
5228 }
5229
5189 /* Find suitable destination for @next, with force if needed. */ 5230 /* Find suitable destination for @next, with force if needed. */
5190 dest_cpu = select_fallback_rq(dead_rq->cpu, next); 5231 dest_cpu = select_fallback_rq(dead_rq->cpu, next);
5191 5232
5192 lockdep_unpin_lock(&rq->lock);
5193 rq = __migrate_task(rq, next, dest_cpu); 5233 rq = __migrate_task(rq, next, dest_cpu);
5194 if (rq != dead_rq) { 5234 if (rq != dead_rq) {
5195 raw_spin_unlock(&rq->lock); 5235 raw_spin_unlock(&rq->lock);
5196 rq = dead_rq; 5236 rq = dead_rq;
5197 raw_spin_lock(&rq->lock); 5237 raw_spin_lock(&rq->lock);
5198 } 5238 }
5239 raw_spin_unlock(&next->pi_lock);
5199 } 5240 }
5200 5241
5201 rq->stop = stop; 5242 rq->stop = stop;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 68cda117574c..6d2a119c7ad9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1078,9 +1078,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
1078 * After ->on_cpu is cleared, the task can be moved to a different CPU. 1078 * After ->on_cpu is cleared, the task can be moved to a different CPU.
1079 * We must ensure this doesn't happen until the switch is completely 1079 * We must ensure this doesn't happen until the switch is completely
1080 * finished. 1080 * finished.
1081 *
1082 * Pairs with the control dependency and rmb in try_to_wake_up().
1081 */ 1083 */
1082 smp_wmb(); 1084 smp_store_release(&prev->on_cpu, 0);
1083 prev->on_cpu = 0;
1084#endif 1085#endif
1085#ifdef CONFIG_DEBUG_SPINLOCK 1086#ifdef CONFIG_DEBUG_SPINLOCK
1086 /* this is a valid case when another task releases the spinlock */ 1087 /* this is a valid case when another task releases the spinlock */
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 272d9322bc5d..052e02672d12 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -106,10 +106,9 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr)
106} 106}
107EXPORT_SYMBOL_GPL(__wake_up_locked); 107EXPORT_SYMBOL_GPL(__wake_up_locked);
108 108
109void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, 109void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
110 void *key)
111{ 110{
112 __wake_up_common(q, mode, nr, 0, key); 111 __wake_up_common(q, mode, 1, 0, key);
113} 112}
114EXPORT_SYMBOL_GPL(__wake_up_locked_key); 113EXPORT_SYMBOL_GPL(__wake_up_locked_key);
115 114
@@ -284,7 +283,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
284 if (!list_empty(&wait->task_list)) 283 if (!list_empty(&wait->task_list))
285 list_del_init(&wait->task_list); 284 list_del_init(&wait->task_list);
286 else if (waitqueue_active(q)) 285 else if (waitqueue_active(q))
287 __wake_up_locked_key(q, mode, 1, key); 286 __wake_up_locked_key(q, mode, key);
288 spin_unlock_irqrestore(&q->lock, flags); 287 spin_unlock_irqrestore(&q->lock, flags);
289} 288}
290EXPORT_SYMBOL(abort_exclusive_wait); 289EXPORT_SYMBOL(abort_exclusive_wait);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 50eb107f1198..a9b76a40319e 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns);
97static int __clockevents_switch_state(struct clock_event_device *dev, 97static int __clockevents_switch_state(struct clock_event_device *dev,
98 enum clock_event_state state) 98 enum clock_event_state state)
99{ 99{
100 /* Transition with legacy set_mode() callback */
101 if (dev->set_mode) {
102 /* Legacy callback doesn't support new modes */
103 if (state > CLOCK_EVT_STATE_ONESHOT)
104 return -ENOSYS;
105 /*
106 * 'clock_event_state' and 'clock_event_mode' have 1-to-1
107 * mapping until *_ONESHOT, and so a simple cast will work.
108 */
109 dev->set_mode((enum clock_event_mode)state, dev);
110 dev->mode = (enum clock_event_mode)state;
111 return 0;
112 }
113
114 if (dev->features & CLOCK_EVT_FEAT_DUMMY) 100 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
115 return 0; 101 return 0;
116 102
@@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev)
204{ 190{
205 int ret = 0; 191 int ret = 0;
206 192
207 if (dev->set_mode) { 193 if (dev->tick_resume)
208 dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
209 dev->mode = CLOCK_EVT_MODE_RESUME;
210 } else if (dev->tick_resume) {
211 ret = dev->tick_resume(dev); 194 ret = dev->tick_resume(dev);
212 }
213 195
214 return ret; 196 return ret;
215} 197}
@@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
460} 442}
461EXPORT_SYMBOL_GPL(clockevents_unbind_device); 443EXPORT_SYMBOL_GPL(clockevents_unbind_device);
462 444
463/* Sanity check of state transition callbacks */
464static int clockevents_sanity_check(struct clock_event_device *dev)
465{
466 /* Legacy set_mode() callback */
467 if (dev->set_mode) {
468 /* We shouldn't be supporting new modes now */
469 WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
470 dev->set_state_shutdown || dev->tick_resume ||
471 dev->set_state_oneshot_stopped);
472
473 BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
474 return 0;
475 }
476
477 if (dev->features & CLOCK_EVT_FEAT_DUMMY)
478 return 0;
479
480 return 0;
481}
482
483/** 445/**
484 * clockevents_register_device - register a clock event device 446 * clockevents_register_device - register a clock event device
485 * @dev: device to register 447 * @dev: device to register
@@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev)
488{ 450{
489 unsigned long flags; 451 unsigned long flags;
490 452
491 BUG_ON(clockevents_sanity_check(dev));
492
493 /* Initialize state to DETACHED */ 453 /* Initialize state to DETACHED */
494 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 454 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
495 455
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 841b72f720e8..3a38775b50c2 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data)
217 continue; 217 continue;
218 218
219 /* Check the deviation from the watchdog clocksource. */ 219 /* Check the deviation from the watchdog clocksource. */
220 if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { 220 if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
221 pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", 221 pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n",
222 cs->name); 222 cs->name);
223 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", 223 pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d11c55b6ab7d..4fcd99e12aa0 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu)
398 * the set mode function! 398 * the set mode function!
399 */ 399 */
400 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 400 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED);
401 dev->mode = CLOCK_EVT_MODE_UNUSED;
402 clockevents_exchange_device(dev, NULL); 401 clockevents_exchange_device(dev, NULL);
403 dev->event_handler = clockevents_handle_noop; 402 dev->event_handler = clockevents_handle_noop;
404 td->evtdev = NULL; 403 td->evtdev = NULL;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3319e16f31e5..7c7ec4515983 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str)
290__setup("nohz_full=", tick_nohz_full_setup); 290__setup("nohz_full=", tick_nohz_full_setup);
291 291
292static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, 292static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
293 unsigned long action, 293 unsigned long action,
294 void *hcpu) 294 void *hcpu)
295{ 295{
296 unsigned int cpu = (unsigned long)hcpu; 296 unsigned int cpu = (unsigned long)hcpu;
297 297
298 switch (action & ~CPU_TASKS_FROZEN) { 298 switch (action & ~CPU_TASKS_FROZEN) {
299 case CPU_DOWN_PREPARE: 299 case CPU_DOWN_PREPARE:
300 /* 300 /*
301 * If we handle the timekeeping duty for full dynticks CPUs, 301 * The boot CPU handles housekeeping duty (unbound timers,
302 * we can't safely shutdown that CPU. 302 * workqueues, timekeeping, ...) on behalf of full dynticks
303 * CPUs. It must remain online when nohz full is enabled.
303 */ 304 */
304 if (tick_nohz_full_running && tick_do_timer_cpu == cpu) 305 if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
305 return NOTIFY_BAD; 306 return NOTIFY_BAD;
@@ -370,6 +371,12 @@ void __init tick_nohz_init(void)
370 cpu_notifier(tick_nohz_cpu_down_callback, 0); 371 cpu_notifier(tick_nohz_cpu_down_callback, 0);
371 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", 372 pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n",
372 cpumask_pr_args(tick_nohz_full_mask)); 373 cpumask_pr_args(tick_nohz_full_mask));
374
375 /*
376 * We need at least one CPU to handle housekeeping work such
377 * as timekeeping, unbound timers, workqueues, ...
378 */
379 WARN_ON_ONCE(cpumask_empty(housekeeping_mask));
373} 380}
374#endif 381#endif
375 382
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f6ee2e6b6f5d..44d2cc0436f4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1251,7 +1251,7 @@ void __init timekeeping_init(void)
1251 set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); 1251 set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec);
1252 tk_set_wall_to_mono(tk, tmp); 1252 tk_set_wall_to_mono(tk, tmp);
1253 1253
1254 timekeeping_update(tk, TK_MIRROR); 1254 timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
1255 1255
1256 write_seqcount_end(&tk_core.seq); 1256 write_seqcount_end(&tk_core.seq);
1257 raw_spin_unlock_irqrestore(&timekeeper_lock, flags); 1257 raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk,
1614 negative = (tick_error < 0); 1614 negative = (tick_error < 0);
1615 1615
1616 /* Sort out the magnitude of the correction */ 1616 /* Sort out the magnitude of the correction */
1617 tick_error = abs(tick_error); 1617 tick_error = abs64(tick_error);
1618 for (adj = 0; tick_error > interval; adj++) 1618 for (adj = 0; tick_error > interval; adj++)
1619 tick_error >>= 1; 1619 tick_error >>= 1;
1620 1620
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 129c96033e46..f75e35b60149 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
225 (unsigned long long) dev->min_delta_ns); 225 (unsigned long long) dev->min_delta_ns);
226 SEQ_printf(m, " mult: %u\n", dev->mult); 226 SEQ_printf(m, " mult: %u\n", dev->mult);
227 SEQ_printf(m, " shift: %u\n", dev->shift); 227 SEQ_printf(m, " shift: %u\n", dev->shift);
228 SEQ_printf(m, " mode: %d\n", dev->mode); 228 SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev));
229 SEQ_printf(m, " next_event: %Ld nsecs\n", 229 SEQ_printf(m, " next_event: %Ld nsecs\n",
230 (unsigned long long) ktime_to_ns(dev->next_event)); 230 (unsigned long long) ktime_to_ns(dev->next_event));
231 231
@@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
233 print_name_offset(m, dev->set_next_event); 233 print_name_offset(m, dev->set_next_event);
234 SEQ_printf(m, "\n"); 234 SEQ_printf(m, "\n");
235 235
236 if (dev->set_mode) { 236 if (dev->set_state_shutdown) {
237 SEQ_printf(m, " set_mode: "); 237 SEQ_printf(m, " shutdown: ");
238 print_name_offset(m, dev->set_mode); 238 print_name_offset(m, dev->set_state_shutdown);
239 SEQ_printf(m, "\n"); 239 SEQ_printf(m, "\n");
240 } else { 240 }
241 if (dev->set_state_shutdown) {
242 SEQ_printf(m, " shutdown: ");
243 print_name_offset(m, dev->set_state_shutdown);
244 SEQ_printf(m, "\n");
245 }
246 241
247 if (dev->set_state_periodic) { 242 if (dev->set_state_periodic) {
248 SEQ_printf(m, " periodic: "); 243 SEQ_printf(m, " periodic: ");
249 print_name_offset(m, dev->set_state_periodic); 244 print_name_offset(m, dev->set_state_periodic);
250 SEQ_printf(m, "\n"); 245 SEQ_printf(m, "\n");
251 } 246 }
252 247
253 if (dev->set_state_oneshot) { 248 if (dev->set_state_oneshot) {
254 SEQ_printf(m, " oneshot: "); 249 SEQ_printf(m, " oneshot: ");
255 print_name_offset(m, dev->set_state_oneshot); 250 print_name_offset(m, dev->set_state_oneshot);
256 SEQ_printf(m, "\n"); 251 SEQ_printf(m, "\n");
257 } 252 }
258 253
259 if (dev->set_state_oneshot_stopped) { 254 if (dev->set_state_oneshot_stopped) {
260 SEQ_printf(m, " oneshot stopped: "); 255 SEQ_printf(m, " oneshot stopped: ");
261 print_name_offset(m, dev->set_state_oneshot_stopped); 256 print_name_offset(m, dev->set_state_oneshot_stopped);
262 SEQ_printf(m, "\n"); 257 SEQ_printf(m, "\n");
263 } 258 }
264 259
265 if (dev->tick_resume) { 260 if (dev->tick_resume) {
266 SEQ_printf(m, " resume: "); 261 SEQ_printf(m, " resume: ");
267 print_name_offset(m, dev->tick_resume); 262 print_name_offset(m, dev->tick_resume);
268 SEQ_printf(m, "\n"); 263 SEQ_printf(m, "\n");
269 }
270 } 264 }
271 265
272 SEQ_printf(m, " event_handler: "); 266 SEQ_printf(m, " event_handler: ");
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ca71582fcfab..bcb14cafe007 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1458,13 +1458,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
1458 timer_stats_timer_set_start_info(&dwork->timer); 1458 timer_stats_timer_set_start_info(&dwork->timer);
1459 1459
1460 dwork->wq = wq; 1460 dwork->wq = wq;
1461 /* timer isn't guaranteed to run in this cpu, record earlier */
1462 if (cpu == WORK_CPU_UNBOUND)
1463 cpu = raw_smp_processor_id();
1461 dwork->cpu = cpu; 1464 dwork->cpu = cpu;
1462 timer->expires = jiffies + delay; 1465 timer->expires = jiffies + delay;
1463 1466
1464 if (unlikely(cpu != WORK_CPU_UNBOUND)) 1467 add_timer_on(timer, cpu);
1465 add_timer_on(timer, cpu);
1466 else
1467 add_timer(timer);
1468} 1468}
1469 1469
1470/** 1470/**