diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 110 | ||||
-rw-r--r-- | kernel/events/core.c | 114 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/irq/chip.c | 33 | ||||
-rw-r--r-- | kernel/irq/handle.c | 6 | ||||
-rw-r--r-- | kernel/irq/internals.h | 2 | ||||
-rw-r--r-- | kernel/irq/irqdesc.c | 20 | ||||
-rw-r--r-- | kernel/irq/irqdomain.c | 1 | ||||
-rw-r--r-- | kernel/irq/manage.c | 12 | ||||
-rw-r--r-- | kernel/irq/msi.c | 6 | ||||
-rw-r--r-- | kernel/irq/proc.c | 21 | ||||
-rw-r--r-- | kernel/irq/resend.c | 2 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 10 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 2 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 75 | ||||
-rw-r--r-- | kernel/sched/sched.h | 5 | ||||
-rw-r--r-- | kernel/sched/wait.c | 7 | ||||
-rw-r--r-- | kernel/time/clockevents.c | 42 | ||||
-rw-r--r-- | kernel/time/clocksource.c | 2 | ||||
-rw-r--r-- | kernel/time/tick-common.c | 1 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 15 | ||||
-rw-r--r-- | kernel/time/timekeeping.c | 4 | ||||
-rw-r--r-- | kernel/time/timer_list.c | 54 | ||||
-rw-r--r-- | kernel/workqueue.c | 8 |
25 files changed, 341 insertions, 220 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2cf0f79f1fc9..2c9eae6ad970 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -46,7 +46,6 @@ | |||
46 | #include <linux/slab.h> | 46 | #include <linux/slab.h> |
47 | #include <linux/spinlock.h> | 47 | #include <linux/spinlock.h> |
48 | #include <linux/rwsem.h> | 48 | #include <linux/rwsem.h> |
49 | #include <linux/percpu-rwsem.h> | ||
50 | #include <linux/string.h> | 49 | #include <linux/string.h> |
51 | #include <linux/sort.h> | 50 | #include <linux/sort.h> |
52 | #include <linux/kmod.h> | 51 | #include <linux/kmod.h> |
@@ -104,8 +103,6 @@ static DEFINE_SPINLOCK(cgroup_idr_lock); | |||
104 | */ | 103 | */ |
105 | static DEFINE_SPINLOCK(release_agent_path_lock); | 104 | static DEFINE_SPINLOCK(release_agent_path_lock); |
106 | 105 | ||
107 | struct percpu_rw_semaphore cgroup_threadgroup_rwsem; | ||
108 | |||
109 | #define cgroup_assert_mutex_or_rcu_locked() \ | 106 | #define cgroup_assert_mutex_or_rcu_locked() \ |
110 | RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ | 107 | RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ |
111 | !lockdep_is_held(&cgroup_mutex), \ | 108 | !lockdep_is_held(&cgroup_mutex), \ |
@@ -874,6 +871,48 @@ static struct css_set *find_css_set(struct css_set *old_cset, | |||
874 | return cset; | 871 | return cset; |
875 | } | 872 | } |
876 | 873 | ||
874 | void cgroup_threadgroup_change_begin(struct task_struct *tsk) | ||
875 | { | ||
876 | down_read(&tsk->signal->group_rwsem); | ||
877 | } | ||
878 | |||
879 | void cgroup_threadgroup_change_end(struct task_struct *tsk) | ||
880 | { | ||
881 | up_read(&tsk->signal->group_rwsem); | ||
882 | } | ||
883 | |||
884 | /** | ||
885 | * threadgroup_lock - lock threadgroup | ||
886 | * @tsk: member task of the threadgroup to lock | ||
887 | * | ||
888 | * Lock the threadgroup @tsk belongs to. No new task is allowed to enter | ||
889 | * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or | ||
890 | * change ->group_leader/pid. This is useful for cases where the threadgroup | ||
891 | * needs to stay stable across blockable operations. | ||
892 | * | ||
893 | * fork and exit explicitly call threadgroup_change_{begin|end}() for | ||
894 | * synchronization. While held, no new task will be added to threadgroup | ||
895 | * and no existing live task will have its PF_EXITING set. | ||
896 | * | ||
897 | * de_thread() does threadgroup_change_{begin|end}() when a non-leader | ||
898 | * sub-thread becomes a new leader. | ||
899 | */ | ||
900 | static void threadgroup_lock(struct task_struct *tsk) | ||
901 | { | ||
902 | down_write(&tsk->signal->group_rwsem); | ||
903 | } | ||
904 | |||
905 | /** | ||
906 | * threadgroup_unlock - unlock threadgroup | ||
907 | * @tsk: member task of the threadgroup to unlock | ||
908 | * | ||
909 | * Reverse threadgroup_lock(). | ||
910 | */ | ||
911 | static inline void threadgroup_unlock(struct task_struct *tsk) | ||
912 | { | ||
913 | up_write(&tsk->signal->group_rwsem); | ||
914 | } | ||
915 | |||
877 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) | 916 | static struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) |
878 | { | 917 | { |
879 | struct cgroup *root_cgrp = kf_root->kn->priv; | 918 | struct cgroup *root_cgrp = kf_root->kn->priv; |
@@ -2074,9 +2113,9 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp, | |||
2074 | lockdep_assert_held(&css_set_rwsem); | 2113 | lockdep_assert_held(&css_set_rwsem); |
2075 | 2114 | ||
2076 | /* | 2115 | /* |
2077 | * We are synchronized through cgroup_threadgroup_rwsem against | 2116 | * We are synchronized through threadgroup_lock() against PF_EXITING |
2078 | * PF_EXITING setting such that we can't race against cgroup_exit() | 2117 | * setting such that we can't race against cgroup_exit() changing the |
2079 | * changing the css_set to init_css_set and dropping the old one. | 2118 | * css_set to init_css_set and dropping the old one. |
2080 | */ | 2119 | */ |
2081 | WARN_ON_ONCE(tsk->flags & PF_EXITING); | 2120 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
2082 | old_cset = task_css_set(tsk); | 2121 | old_cset = task_css_set(tsk); |
@@ -2133,11 +2172,10 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) | |||
2133 | * @src_cset and add it to @preloaded_csets, which should later be cleaned | 2172 | * @src_cset and add it to @preloaded_csets, which should later be cleaned |
2134 | * up by cgroup_migrate_finish(). | 2173 | * up by cgroup_migrate_finish(). |
2135 | * | 2174 | * |
2136 | * This function may be called without holding cgroup_threadgroup_rwsem | 2175 | * This function may be called without holding threadgroup_lock even if the |
2137 | * even if the target is a process. Threads may be created and destroyed | 2176 | * target is a process. Threads may be created and destroyed but as long |
2138 | * but as long as cgroup_mutex is not dropped, no new css_set can be put | 2177 | * as cgroup_mutex is not dropped, no new css_set can be put into play and |
2139 | * into play and the preloaded css_sets are guaranteed to cover all | 2178 | * the preloaded css_sets are guaranteed to cover all migrations. |
2140 | * migrations. | ||
2141 | */ | 2179 | */ |
2142 | static void cgroup_migrate_add_src(struct css_set *src_cset, | 2180 | static void cgroup_migrate_add_src(struct css_set *src_cset, |
2143 | struct cgroup *dst_cgrp, | 2181 | struct cgroup *dst_cgrp, |
@@ -2240,7 +2278,7 @@ err: | |||
2240 | * @threadgroup: whether @leader points to the whole process or a single task | 2278 | * @threadgroup: whether @leader points to the whole process or a single task |
2241 | * | 2279 | * |
2242 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a | 2280 | * Migrate a process or task denoted by @leader to @cgrp. If migrating a |
2243 | * process, the caller must be holding cgroup_threadgroup_rwsem. The | 2281 | * process, the caller must be holding threadgroup_lock of @leader. The |
2244 | * caller is also responsible for invoking cgroup_migrate_add_src() and | 2282 | * caller is also responsible for invoking cgroup_migrate_add_src() and |
2245 | * cgroup_migrate_prepare_dst() on the targets before invoking this | 2283 | * cgroup_migrate_prepare_dst() on the targets before invoking this |
2246 | * function and following up with cgroup_migrate_finish(). | 2284 | * function and following up with cgroup_migrate_finish(). |
@@ -2368,7 +2406,7 @@ out_release_tset: | |||
2368 | * @leader: the task or the leader of the threadgroup to be attached | 2406 | * @leader: the task or the leader of the threadgroup to be attached |
2369 | * @threadgroup: attach the whole threadgroup? | 2407 | * @threadgroup: attach the whole threadgroup? |
2370 | * | 2408 | * |
2371 | * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. | 2409 | * Call holding cgroup_mutex and threadgroup_lock of @leader. |
2372 | */ | 2410 | */ |
2373 | static int cgroup_attach_task(struct cgroup *dst_cgrp, | 2411 | static int cgroup_attach_task(struct cgroup *dst_cgrp, |
2374 | struct task_struct *leader, bool threadgroup) | 2412 | struct task_struct *leader, bool threadgroup) |
@@ -2460,13 +2498,14 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2460 | if (!cgrp) | 2498 | if (!cgrp) |
2461 | return -ENODEV; | 2499 | return -ENODEV; |
2462 | 2500 | ||
2463 | percpu_down_write(&cgroup_threadgroup_rwsem); | 2501 | retry_find_task: |
2464 | rcu_read_lock(); | 2502 | rcu_read_lock(); |
2465 | if (pid) { | 2503 | if (pid) { |
2466 | tsk = find_task_by_vpid(pid); | 2504 | tsk = find_task_by_vpid(pid); |
2467 | if (!tsk) { | 2505 | if (!tsk) { |
2506 | rcu_read_unlock(); | ||
2468 | ret = -ESRCH; | 2507 | ret = -ESRCH; |
2469 | goto out_unlock_rcu; | 2508 | goto out_unlock_cgroup; |
2470 | } | 2509 | } |
2471 | } else { | 2510 | } else { |
2472 | tsk = current; | 2511 | tsk = current; |
@@ -2482,23 +2521,37 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, | |||
2482 | */ | 2521 | */ |
2483 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { | 2522 | if (tsk == kthreadd_task || (tsk->flags & PF_NO_SETAFFINITY)) { |
2484 | ret = -EINVAL; | 2523 | ret = -EINVAL; |
2485 | goto out_unlock_rcu; | 2524 | rcu_read_unlock(); |
2525 | goto out_unlock_cgroup; | ||
2486 | } | 2526 | } |
2487 | 2527 | ||
2488 | get_task_struct(tsk); | 2528 | get_task_struct(tsk); |
2489 | rcu_read_unlock(); | 2529 | rcu_read_unlock(); |
2490 | 2530 | ||
2531 | threadgroup_lock(tsk); | ||
2532 | if (threadgroup) { | ||
2533 | if (!thread_group_leader(tsk)) { | ||
2534 | /* | ||
2535 | * a race with de_thread from another thread's exec() | ||
2536 | * may strip us of our leadership, if this happens, | ||
2537 | * there is no choice but to throw this task away and | ||
2538 | * try again; this is | ||
2539 | * "double-double-toil-and-trouble-check locking". | ||
2540 | */ | ||
2541 | threadgroup_unlock(tsk); | ||
2542 | put_task_struct(tsk); | ||
2543 | goto retry_find_task; | ||
2544 | } | ||
2545 | } | ||
2546 | |||
2491 | ret = cgroup_procs_write_permission(tsk, cgrp, of); | 2547 | ret = cgroup_procs_write_permission(tsk, cgrp, of); |
2492 | if (!ret) | 2548 | if (!ret) |
2493 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); | 2549 | ret = cgroup_attach_task(cgrp, tsk, threadgroup); |
2494 | 2550 | ||
2495 | put_task_struct(tsk); | 2551 | threadgroup_unlock(tsk); |
2496 | goto out_unlock_threadgroup; | ||
2497 | 2552 | ||
2498 | out_unlock_rcu: | 2553 | put_task_struct(tsk); |
2499 | rcu_read_unlock(); | 2554 | out_unlock_cgroup: |
2500 | out_unlock_threadgroup: | ||
2501 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2502 | cgroup_kn_unlock(of->kn); | 2555 | cgroup_kn_unlock(of->kn); |
2503 | return ret ?: nbytes; | 2556 | return ret ?: nbytes; |
2504 | } | 2557 | } |
@@ -2643,8 +2696,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2643 | 2696 | ||
2644 | lockdep_assert_held(&cgroup_mutex); | 2697 | lockdep_assert_held(&cgroup_mutex); |
2645 | 2698 | ||
2646 | percpu_down_write(&cgroup_threadgroup_rwsem); | ||
2647 | |||
2648 | /* look up all csses currently attached to @cgrp's subtree */ | 2699 | /* look up all csses currently attached to @cgrp's subtree */ |
2649 | down_read(&css_set_rwsem); | 2700 | down_read(&css_set_rwsem); |
2650 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { | 2701 | css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { |
@@ -2700,8 +2751,17 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2700 | goto out_finish; | 2751 | goto out_finish; |
2701 | last_task = task; | 2752 | last_task = task; |
2702 | 2753 | ||
2754 | threadgroup_lock(task); | ||
2755 | /* raced against de_thread() from another thread? */ | ||
2756 | if (!thread_group_leader(task)) { | ||
2757 | threadgroup_unlock(task); | ||
2758 | put_task_struct(task); | ||
2759 | continue; | ||
2760 | } | ||
2761 | |||
2703 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); | 2762 | ret = cgroup_migrate(src_cset->dfl_cgrp, task, true); |
2704 | 2763 | ||
2764 | threadgroup_unlock(task); | ||
2705 | put_task_struct(task); | 2765 | put_task_struct(task); |
2706 | 2766 | ||
2707 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) | 2767 | if (WARN(ret, "cgroup: failed to update controllers for the default hierarchy (%d), further operations may crash or hang\n", ret)) |
@@ -2711,7 +2771,6 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) | |||
2711 | 2771 | ||
2712 | out_finish: | 2772 | out_finish: |
2713 | cgroup_migrate_finish(&preloaded_csets); | 2773 | cgroup_migrate_finish(&preloaded_csets); |
2714 | percpu_up_write(&cgroup_threadgroup_rwsem); | ||
2715 | return ret; | 2774 | return ret; |
2716 | } | 2775 | } |
2717 | 2776 | ||
@@ -5024,7 +5083,6 @@ int __init cgroup_init(void) | |||
5024 | unsigned long key; | 5083 | unsigned long key; |
5025 | int ssid, err; | 5084 | int ssid, err; |
5026 | 5085 | ||
5027 | BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); | ||
5028 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); | 5086 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); |
5029 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); | 5087 | BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); |
5030 | 5088 | ||
diff --git a/kernel/events/core.c b/kernel/events/core.c index f548f69c4299..b11756f9b6dc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event) | |||
1243 | PERF_EVENT_STATE_INACTIVE; | 1243 | PERF_EVENT_STATE_INACTIVE; |
1244 | } | 1244 | } |
1245 | 1245 | ||
1246 | /* | 1246 | static void __perf_event_read_size(struct perf_event *event, int nr_siblings) |
1247 | * Called at perf_event creation and when events are attached/detached from a | ||
1248 | * group. | ||
1249 | */ | ||
1250 | static void perf_event__read_size(struct perf_event *event) | ||
1251 | { | 1247 | { |
1252 | int entry = sizeof(u64); /* value */ | 1248 | int entry = sizeof(u64); /* value */ |
1253 | int size = 0; | 1249 | int size = 0; |
@@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event) | |||
1263 | entry += sizeof(u64); | 1259 | entry += sizeof(u64); |
1264 | 1260 | ||
1265 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | 1261 | if (event->attr.read_format & PERF_FORMAT_GROUP) { |
1266 | nr += event->group_leader->nr_siblings; | 1262 | nr += nr_siblings; |
1267 | size += sizeof(u64); | 1263 | size += sizeof(u64); |
1268 | } | 1264 | } |
1269 | 1265 | ||
@@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event) | |||
1271 | event->read_size = size; | 1267 | event->read_size = size; |
1272 | } | 1268 | } |
1273 | 1269 | ||
1274 | static void perf_event__header_size(struct perf_event *event) | 1270 | static void __perf_event_header_size(struct perf_event *event, u64 sample_type) |
1275 | { | 1271 | { |
1276 | struct perf_sample_data *data; | 1272 | struct perf_sample_data *data; |
1277 | u64 sample_type = event->attr.sample_type; | ||
1278 | u16 size = 0; | 1273 | u16 size = 0; |
1279 | 1274 | ||
1280 | perf_event__read_size(event); | ||
1281 | |||
1282 | if (sample_type & PERF_SAMPLE_IP) | 1275 | if (sample_type & PERF_SAMPLE_IP) |
1283 | size += sizeof(data->ip); | 1276 | size += sizeof(data->ip); |
1284 | 1277 | ||
@@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event) | |||
1303 | event->header_size = size; | 1296 | event->header_size = size; |
1304 | } | 1297 | } |
1305 | 1298 | ||
1299 | /* | ||
1300 | * Called at perf_event creation and when events are attached/detached from a | ||
1301 | * group. | ||
1302 | */ | ||
1303 | static void perf_event__header_size(struct perf_event *event) | ||
1304 | { | ||
1305 | __perf_event_read_size(event, | ||
1306 | event->group_leader->nr_siblings); | ||
1307 | __perf_event_header_size(event, event->attr.sample_type); | ||
1308 | } | ||
1309 | |||
1306 | static void perf_event__id_header_size(struct perf_event *event) | 1310 | static void perf_event__id_header_size(struct perf_event *event) |
1307 | { | 1311 | { |
1308 | struct perf_sample_data *data; | 1312 | struct perf_sample_data *data; |
@@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event) | |||
1330 | event->id_header_size = size; | 1334 | event->id_header_size = size; |
1331 | } | 1335 | } |
1332 | 1336 | ||
1337 | static bool perf_event_validate_size(struct perf_event *event) | ||
1338 | { | ||
1339 | /* | ||
1340 | * The values computed here will be over-written when we actually | ||
1341 | * attach the event. | ||
1342 | */ | ||
1343 | __perf_event_read_size(event, event->group_leader->nr_siblings + 1); | ||
1344 | __perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ); | ||
1345 | perf_event__id_header_size(event); | ||
1346 | |||
1347 | /* | ||
1348 | * Sum the lot; should not exceed the 64k limit we have on records. | ||
1349 | * Conservative limit to allow for callchains and other variable fields. | ||
1350 | */ | ||
1351 | if (event->read_size + event->header_size + | ||
1352 | event->id_header_size + sizeof(struct perf_event_header) >= 16*1024) | ||
1353 | return false; | ||
1354 | |||
1355 | return true; | ||
1356 | } | ||
1357 | |||
1333 | static void perf_group_attach(struct perf_event *event) | 1358 | static void perf_group_attach(struct perf_event *event) |
1334 | { | 1359 | { |
1335 | struct perf_event *group_leader = event->group_leader, *pos; | 1360 | struct perf_event *group_leader = event->group_leader, *pos; |
@@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8297 | 8322 | ||
8298 | if (move_group) { | 8323 | if (move_group) { |
8299 | gctx = group_leader->ctx; | 8324 | gctx = group_leader->ctx; |
8325 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | ||
8326 | } else { | ||
8327 | mutex_lock(&ctx->mutex); | ||
8328 | } | ||
8300 | 8329 | ||
8330 | if (!perf_event_validate_size(event)) { | ||
8331 | err = -E2BIG; | ||
8332 | goto err_locked; | ||
8333 | } | ||
8334 | |||
8335 | /* | ||
8336 | * Must be under the same ctx::mutex as perf_install_in_context(), | ||
8337 | * because we need to serialize with concurrent event creation. | ||
8338 | */ | ||
8339 | if (!exclusive_event_installable(event, ctx)) { | ||
8340 | /* exclusive and group stuff are assumed mutually exclusive */ | ||
8341 | WARN_ON_ONCE(move_group); | ||
8342 | |||
8343 | err = -EBUSY; | ||
8344 | goto err_locked; | ||
8345 | } | ||
8346 | |||
8347 | WARN_ON_ONCE(ctx->parent_ctx); | ||
8348 | |||
8349 | if (move_group) { | ||
8301 | /* | 8350 | /* |
8302 | * See perf_event_ctx_lock() for comments on the details | 8351 | * See perf_event_ctx_lock() for comments on the details |
8303 | * of swizzling perf_event::ctx. | 8352 | * of swizzling perf_event::ctx. |
8304 | */ | 8353 | */ |
8305 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | ||
8306 | |||
8307 | perf_remove_from_context(group_leader, false); | 8354 | perf_remove_from_context(group_leader, false); |
8308 | 8355 | ||
8309 | list_for_each_entry(sibling, &group_leader->sibling_list, | 8356 | list_for_each_entry(sibling, &group_leader->sibling_list, |
@@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8311 | perf_remove_from_context(sibling, false); | 8358 | perf_remove_from_context(sibling, false); |
8312 | put_ctx(gctx); | 8359 | put_ctx(gctx); |
8313 | } | 8360 | } |
8314 | } else { | ||
8315 | mutex_lock(&ctx->mutex); | ||
8316 | } | ||
8317 | 8361 | ||
8318 | WARN_ON_ONCE(ctx->parent_ctx); | ||
8319 | |||
8320 | if (move_group) { | ||
8321 | /* | 8362 | /* |
8322 | * Wait for everybody to stop referencing the events through | 8363 | * Wait for everybody to stop referencing the events through |
8323 | * the old lists, before installing it on new lists. | 8364 | * the old lists, before installing it on new lists. |
@@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8349 | perf_event__state_init(group_leader); | 8390 | perf_event__state_init(group_leader); |
8350 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | 8391 | perf_install_in_context(ctx, group_leader, group_leader->cpu); |
8351 | get_ctx(ctx); | 8392 | get_ctx(ctx); |
8352 | } | ||
8353 | 8393 | ||
8354 | if (!exclusive_event_installable(event, ctx)) { | 8394 | /* |
8355 | err = -EBUSY; | 8395 | * Now that all events are installed in @ctx, nothing |
8356 | mutex_unlock(&ctx->mutex); | 8396 | * references @gctx anymore, so drop the last reference we have |
8357 | fput(event_file); | 8397 | * on it. |
8358 | goto err_context; | 8398 | */ |
8399 | put_ctx(gctx); | ||
8359 | } | 8400 | } |
8360 | 8401 | ||
8402 | /* | ||
8403 | * Precalculate sample_data sizes; do while holding ctx::mutex such | ||
8404 | * that we're serialized against further additions and before | ||
8405 | * perf_install_in_context() which is the point the event is active and | ||
8406 | * can use these values. | ||
8407 | */ | ||
8408 | perf_event__header_size(event); | ||
8409 | perf_event__id_header_size(event); | ||
8410 | |||
8361 | perf_install_in_context(ctx, event, event->cpu); | 8411 | perf_install_in_context(ctx, event, event->cpu); |
8362 | perf_unpin_context(ctx); | 8412 | perf_unpin_context(ctx); |
8363 | 8413 | ||
8364 | if (move_group) { | 8414 | if (move_group) |
8365 | mutex_unlock(&gctx->mutex); | 8415 | mutex_unlock(&gctx->mutex); |
8366 | put_ctx(gctx); | ||
8367 | } | ||
8368 | mutex_unlock(&ctx->mutex); | 8416 | mutex_unlock(&ctx->mutex); |
8369 | 8417 | ||
8370 | put_online_cpus(); | 8418 | put_online_cpus(); |
@@ -8376,12 +8424,6 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8376 | mutex_unlock(¤t->perf_event_mutex); | 8424 | mutex_unlock(¤t->perf_event_mutex); |
8377 | 8425 | ||
8378 | /* | 8426 | /* |
8379 | * Precalculate sample_data sizes | ||
8380 | */ | ||
8381 | perf_event__header_size(event); | ||
8382 | perf_event__id_header_size(event); | ||
8383 | |||
8384 | /* | ||
8385 | * Drop the reference on the group_event after placing the | 8427 | * Drop the reference on the group_event after placing the |
8386 | * new event on the sibling_list. This ensures destruction | 8428 | * new event on the sibling_list. This ensures destruction |
8387 | * of the group leader will find the pointer to itself in | 8429 | * of the group leader will find the pointer to itself in |
@@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open, | |||
8391 | fd_install(event_fd, event_file); | 8433 | fd_install(event_fd, event_file); |
8392 | return event_fd; | 8434 | return event_fd; |
8393 | 8435 | ||
8436 | err_locked: | ||
8437 | if (move_group) | ||
8438 | mutex_unlock(&gctx->mutex); | ||
8439 | mutex_unlock(&ctx->mutex); | ||
8440 | /* err_file: */ | ||
8441 | fput(event_file); | ||
8394 | err_context: | 8442 | err_context: |
8395 | perf_unpin_context(ctx); | 8443 | perf_unpin_context(ctx); |
8396 | put_ctx(ctx); | 8444 | put_ctx(ctx); |
diff --git a/kernel/fork.c b/kernel/fork.c index 7d5f0f118a63..2845623fb582 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1149,6 +1149,10 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
1149 | tty_audit_fork(sig); | 1149 | tty_audit_fork(sig); |
1150 | sched_autogroup_fork(sig); | 1150 | sched_autogroup_fork(sig); |
1151 | 1151 | ||
1152 | #ifdef CONFIG_CGROUPS | ||
1153 | init_rwsem(&sig->group_rwsem); | ||
1154 | #endif | ||
1155 | |||
1152 | sig->oom_score_adj = current->signal->oom_score_adj; | 1156 | sig->oom_score_adj = current->signal->oom_score_adj; |
1153 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; | 1157 | sig->oom_score_adj_min = current->signal->oom_score_adj_min; |
1154 | 1158 | ||
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6e40a9539763..e28169dd1c36 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c | |||
@@ -83,7 +83,7 @@ int irq_set_handler_data(unsigned int irq, void *data) | |||
83 | 83 | ||
84 | if (!desc) | 84 | if (!desc) |
85 | return -EINVAL; | 85 | return -EINVAL; |
86 | desc->irq_data.handler_data = data; | 86 | desc->irq_common_data.handler_data = data; |
87 | irq_put_desc_unlock(desc, flags); | 87 | irq_put_desc_unlock(desc, flags); |
88 | return 0; | 88 | return 0; |
89 | } | 89 | } |
@@ -105,7 +105,7 @@ int irq_set_msi_desc_off(unsigned int irq_base, unsigned int irq_offset, | |||
105 | 105 | ||
106 | if (!desc) | 106 | if (!desc) |
107 | return -EINVAL; | 107 | return -EINVAL; |
108 | desc->irq_data.msi_desc = entry; | 108 | desc->irq_common_data.msi_desc = entry; |
109 | if (entry && !irq_offset) | 109 | if (entry && !irq_offset) |
110 | entry->irq = irq_base; | 110 | entry->irq = irq_base; |
111 | irq_put_desc_unlock(desc, flags); | 111 | irq_put_desc_unlock(desc, flags); |
@@ -372,7 +372,6 @@ static bool irq_may_run(struct irq_desc *desc) | |||
372 | 372 | ||
373 | /** | 373 | /** |
374 | * handle_simple_irq - Simple and software-decoded IRQs. | 374 | * handle_simple_irq - Simple and software-decoded IRQs. |
375 | * @irq: the interrupt number | ||
376 | * @desc: the interrupt description structure for this irq | 375 | * @desc: the interrupt description structure for this irq |
377 | * | 376 | * |
378 | * Simple interrupts are either sent from a demultiplexing interrupt | 377 | * Simple interrupts are either sent from a demultiplexing interrupt |
@@ -382,8 +381,7 @@ static bool irq_may_run(struct irq_desc *desc) | |||
382 | * Note: The caller is expected to handle the ack, clear, mask and | 381 | * Note: The caller is expected to handle the ack, clear, mask and |
383 | * unmask issues if necessary. | 382 | * unmask issues if necessary. |
384 | */ | 383 | */ |
385 | void | 384 | void handle_simple_irq(struct irq_desc *desc) |
386 | handle_simple_irq(unsigned int irq, struct irq_desc *desc) | ||
387 | { | 385 | { |
388 | raw_spin_lock(&desc->lock); | 386 | raw_spin_lock(&desc->lock); |
389 | 387 | ||
@@ -425,7 +423,6 @@ static void cond_unmask_irq(struct irq_desc *desc) | |||
425 | 423 | ||
426 | /** | 424 | /** |
427 | * handle_level_irq - Level type irq handler | 425 | * handle_level_irq - Level type irq handler |
428 | * @irq: the interrupt number | ||
429 | * @desc: the interrupt description structure for this irq | 426 | * @desc: the interrupt description structure for this irq |
430 | * | 427 | * |
431 | * Level type interrupts are active as long as the hardware line has | 428 | * Level type interrupts are active as long as the hardware line has |
@@ -433,8 +430,7 @@ static void cond_unmask_irq(struct irq_desc *desc) | |||
433 | * it after the associated handler has acknowledged the device, so the | 430 | * it after the associated handler has acknowledged the device, so the |
434 | * interrupt line is back to inactive. | 431 | * interrupt line is back to inactive. |
435 | */ | 432 | */ |
436 | void | 433 | void handle_level_irq(struct irq_desc *desc) |
437 | handle_level_irq(unsigned int irq, struct irq_desc *desc) | ||
438 | { | 434 | { |
439 | raw_spin_lock(&desc->lock); | 435 | raw_spin_lock(&desc->lock); |
440 | mask_ack_irq(desc); | 436 | mask_ack_irq(desc); |
@@ -496,7 +492,6 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) | |||
496 | 492 | ||
497 | /** | 493 | /** |
498 | * handle_fasteoi_irq - irq handler for transparent controllers | 494 | * handle_fasteoi_irq - irq handler for transparent controllers |
499 | * @irq: the interrupt number | ||
500 | * @desc: the interrupt description structure for this irq | 495 | * @desc: the interrupt description structure for this irq |
501 | * | 496 | * |
502 | * Only a single callback will be issued to the chip: an ->eoi() | 497 | * Only a single callback will be issued to the chip: an ->eoi() |
@@ -504,8 +499,7 @@ static void cond_unmask_eoi_irq(struct irq_desc *desc, struct irq_chip *chip) | |||
504 | * for modern forms of interrupt handlers, which handle the flow | 499 | * for modern forms of interrupt handlers, which handle the flow |
505 | * details in hardware, transparently. | 500 | * details in hardware, transparently. |
506 | */ | 501 | */ |
507 | void | 502 | void handle_fasteoi_irq(struct irq_desc *desc) |
508 | handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc) | ||
509 | { | 503 | { |
510 | struct irq_chip *chip = desc->irq_data.chip; | 504 | struct irq_chip *chip = desc->irq_data.chip; |
511 | 505 | ||
@@ -546,7 +540,6 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); | |||
546 | 540 | ||
547 | /** | 541 | /** |
548 | * handle_edge_irq - edge type IRQ handler | 542 | * handle_edge_irq - edge type IRQ handler |
549 | * @irq: the interrupt number | ||
550 | * @desc: the interrupt description structure for this irq | 543 | * @desc: the interrupt description structure for this irq |
551 | * | 544 | * |
552 | * Interrupt occures on the falling and/or rising edge of a hardware | 545 | * Interrupt occures on the falling and/or rising edge of a hardware |
@@ -560,8 +553,7 @@ EXPORT_SYMBOL_GPL(handle_fasteoi_irq); | |||
560 | * the handler was running. If all pending interrupts are handled, the | 553 | * the handler was running. If all pending interrupts are handled, the |
561 | * loop is left. | 554 | * loop is left. |
562 | */ | 555 | */ |
563 | void | 556 | void handle_edge_irq(struct irq_desc *desc) |
564 | handle_edge_irq(unsigned int irq, struct irq_desc *desc) | ||
565 | { | 557 | { |
566 | raw_spin_lock(&desc->lock); | 558 | raw_spin_lock(&desc->lock); |
567 | 559 | ||
@@ -618,13 +610,12 @@ EXPORT_SYMBOL(handle_edge_irq); | |||
618 | #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER | 610 | #ifdef CONFIG_IRQ_EDGE_EOI_HANDLER |
619 | /** | 611 | /** |
620 | * handle_edge_eoi_irq - edge eoi type IRQ handler | 612 | * handle_edge_eoi_irq - edge eoi type IRQ handler |
621 | * @irq: the interrupt number | ||
622 | * @desc: the interrupt description structure for this irq | 613 | * @desc: the interrupt description structure for this irq |
623 | * | 614 | * |
624 | * Similar as the above handle_edge_irq, but using eoi and w/o the | 615 | * Similar as the above handle_edge_irq, but using eoi and w/o the |
625 | * mask/unmask logic. | 616 | * mask/unmask logic. |
626 | */ | 617 | */ |
627 | void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc) | 618 | void handle_edge_eoi_irq(struct irq_desc *desc) |
628 | { | 619 | { |
629 | struct irq_chip *chip = irq_desc_get_chip(desc); | 620 | struct irq_chip *chip = irq_desc_get_chip(desc); |
630 | 621 | ||
@@ -665,13 +656,11 @@ out_eoi: | |||
665 | 656 | ||
666 | /** | 657 | /** |
667 | * handle_percpu_irq - Per CPU local irq handler | 658 | * handle_percpu_irq - Per CPU local irq handler |
668 | * @irq: the interrupt number | ||
669 | * @desc: the interrupt description structure for this irq | 659 | * @desc: the interrupt description structure for this irq |
670 | * | 660 | * |
671 | * Per CPU interrupts on SMP machines without locking requirements | 661 | * Per CPU interrupts on SMP machines without locking requirements |
672 | */ | 662 | */ |
673 | void | 663 | void handle_percpu_irq(struct irq_desc *desc) |
674 | handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | ||
675 | { | 664 | { |
676 | struct irq_chip *chip = irq_desc_get_chip(desc); | 665 | struct irq_chip *chip = irq_desc_get_chip(desc); |
677 | 666 | ||
@@ -688,7 +677,6 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
688 | 677 | ||
689 | /** | 678 | /** |
690 | * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids | 679 | * handle_percpu_devid_irq - Per CPU local irq handler with per cpu dev ids |
691 | * @irq: the interrupt number | ||
692 | * @desc: the interrupt description structure for this irq | 680 | * @desc: the interrupt description structure for this irq |
693 | * | 681 | * |
694 | * Per CPU interrupts on SMP machines without locking requirements. Same as | 682 | * Per CPU interrupts on SMP machines without locking requirements. Same as |
@@ -698,11 +686,12 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc) | |||
698 | * contain the real device id for the cpu on which this handler is | 686 | * contain the real device id for the cpu on which this handler is |
699 | * called | 687 | * called |
700 | */ | 688 | */ |
701 | void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc) | 689 | void handle_percpu_devid_irq(struct irq_desc *desc) |
702 | { | 690 | { |
703 | struct irq_chip *chip = irq_desc_get_chip(desc); | 691 | struct irq_chip *chip = irq_desc_get_chip(desc); |
704 | struct irqaction *action = desc->action; | 692 | struct irqaction *action = desc->action; |
705 | void *dev_id = raw_cpu_ptr(action->percpu_dev_id); | 693 | void *dev_id = raw_cpu_ptr(action->percpu_dev_id); |
694 | unsigned int irq = irq_desc_get_irq(desc); | ||
706 | irqreturn_t res; | 695 | irqreturn_t res; |
707 | 696 | ||
708 | kstat_incr_irqs_this_cpu(desc); | 697 | kstat_incr_irqs_this_cpu(desc); |
@@ -796,7 +785,7 @@ irq_set_chained_handler_and_data(unsigned int irq, irq_flow_handler_t handle, | |||
796 | return; | 785 | return; |
797 | 786 | ||
798 | __irq_do_set_handler(desc, handle, 1, NULL); | 787 | __irq_do_set_handler(desc, handle, 1, NULL); |
799 | desc->irq_data.handler_data = data; | 788 | desc->irq_common_data.handler_data = data; |
800 | 789 | ||
801 | irq_put_desc_busunlock(desc, flags); | 790 | irq_put_desc_busunlock(desc, flags); |
802 | } | 791 | } |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index b6eeea8a80c5..e25a83b67cce 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -22,17 +22,19 @@ | |||
22 | 22 | ||
23 | /** | 23 | /** |
24 | * handle_bad_irq - handle spurious and unhandled irqs | 24 | * handle_bad_irq - handle spurious and unhandled irqs |
25 | * @irq: the interrupt number | ||
26 | * @desc: description of the interrupt | 25 | * @desc: description of the interrupt |
27 | * | 26 | * |
28 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. | 27 | * Handles spurious and unhandled IRQ's. It also prints a debugmessage. |
29 | */ | 28 | */ |
30 | void handle_bad_irq(unsigned int irq, struct irq_desc *desc) | 29 | void handle_bad_irq(struct irq_desc *desc) |
31 | { | 30 | { |
31 | unsigned int irq = irq_desc_get_irq(desc); | ||
32 | |||
32 | print_irq_desc(irq, desc); | 33 | print_irq_desc(irq, desc); |
33 | kstat_incr_irqs_this_cpu(desc); | 34 | kstat_incr_irqs_this_cpu(desc); |
34 | ack_bad_irq(irq); | 35 | ack_bad_irq(irq); |
35 | } | 36 | } |
37 | EXPORT_SYMBOL_GPL(handle_bad_irq); | ||
36 | 38 | ||
37 | /* | 39 | /* |
38 | * Special, empty irq handler: | 40 | * Special, empty irq handler: |
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index eee4b385cffb..5ef0c2dbe930 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h | |||
@@ -194,7 +194,7 @@ static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) | |||
194 | 194 | ||
195 | static inline int irq_desc_get_node(struct irq_desc *desc) | 195 | static inline int irq_desc_get_node(struct irq_desc *desc) |
196 | { | 196 | { |
197 | return irq_data_get_node(&desc->irq_data); | 197 | return irq_common_data_get_node(&desc->irq_common_data); |
198 | } | 198 | } |
199 | 199 | ||
200 | #ifdef CONFIG_PM_SLEEP | 200 | #ifdef CONFIG_PM_SLEEP |
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 0a2a4b697bcb..239e2ae2c947 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c | |||
@@ -38,12 +38,13 @@ static void __init init_irq_default_affinity(void) | |||
38 | #ifdef CONFIG_SMP | 38 | #ifdef CONFIG_SMP |
39 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | 39 | static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) |
40 | { | 40 | { |
41 | if (!zalloc_cpumask_var_node(&desc->irq_data.affinity, gfp, node)) | 41 | if (!zalloc_cpumask_var_node(&desc->irq_common_data.affinity, |
42 | gfp, node)) | ||
42 | return -ENOMEM; | 43 | return -ENOMEM; |
43 | 44 | ||
44 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 45 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
45 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { | 46 | if (!zalloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { |
46 | free_cpumask_var(desc->irq_data.affinity); | 47 | free_cpumask_var(desc->irq_common_data.affinity); |
47 | return -ENOMEM; | 48 | return -ENOMEM; |
48 | } | 49 | } |
49 | #endif | 50 | #endif |
@@ -52,11 +53,13 @@ static int alloc_masks(struct irq_desc *desc, gfp_t gfp, int node) | |||
52 | 53 | ||
53 | static void desc_smp_init(struct irq_desc *desc, int node) | 54 | static void desc_smp_init(struct irq_desc *desc, int node) |
54 | { | 55 | { |
55 | desc->irq_data.node = node; | 56 | cpumask_copy(desc->irq_common_data.affinity, irq_default_affinity); |
56 | cpumask_copy(desc->irq_data.affinity, irq_default_affinity); | ||
57 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 57 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
58 | cpumask_clear(desc->pending_mask); | 58 | cpumask_clear(desc->pending_mask); |
59 | #endif | 59 | #endif |
60 | #ifdef CONFIG_NUMA | ||
61 | desc->irq_common_data.node = node; | ||
62 | #endif | ||
60 | } | 63 | } |
61 | 64 | ||
62 | #else | 65 | #else |
@@ -70,12 +73,13 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, | |||
70 | { | 73 | { |
71 | int cpu; | 74 | int cpu; |
72 | 75 | ||
76 | desc->irq_common_data.handler_data = NULL; | ||
77 | desc->irq_common_data.msi_desc = NULL; | ||
78 | |||
73 | desc->irq_data.common = &desc->irq_common_data; | 79 | desc->irq_data.common = &desc->irq_common_data; |
74 | desc->irq_data.irq = irq; | 80 | desc->irq_data.irq = irq; |
75 | desc->irq_data.chip = &no_irq_chip; | 81 | desc->irq_data.chip = &no_irq_chip; |
76 | desc->irq_data.chip_data = NULL; | 82 | desc->irq_data.chip_data = NULL; |
77 | desc->irq_data.handler_data = NULL; | ||
78 | desc->irq_data.msi_desc = NULL; | ||
79 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); | 83 | irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS); |
80 | irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); | 84 | irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED); |
81 | desc->handle_irq = handle_bad_irq; | 85 | desc->handle_irq = handle_bad_irq; |
@@ -121,7 +125,7 @@ static void free_masks(struct irq_desc *desc) | |||
121 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 125 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
122 | free_cpumask_var(desc->pending_mask); | 126 | free_cpumask_var(desc->pending_mask); |
123 | #endif | 127 | #endif |
124 | free_cpumask_var(desc->irq_data.affinity); | 128 | free_cpumask_var(desc->irq_common_data.affinity); |
125 | } | 129 | } |
126 | #else | 130 | #else |
127 | static inline void free_masks(struct irq_desc *desc) { } | 131 | static inline void free_masks(struct irq_desc *desc) { } |
@@ -343,7 +347,7 @@ int generic_handle_irq(unsigned int irq) | |||
343 | 347 | ||
344 | if (!desc) | 348 | if (!desc) |
345 | return -EINVAL; | 349 | return -EINVAL; |
346 | generic_handle_irq_desc(irq, desc); | 350 | generic_handle_irq_desc(desc); |
347 | return 0; | 351 | return 0; |
348 | } | 352 | } |
349 | EXPORT_SYMBOL_GPL(generic_handle_irq); | 353 | EXPORT_SYMBOL_GPL(generic_handle_irq); |
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 79baaf8a7813..dc9d27c0c158 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c | |||
@@ -844,7 +844,6 @@ static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain, | |||
844 | child->parent_data = irq_data; | 844 | child->parent_data = irq_data; |
845 | irq_data->irq = child->irq; | 845 | irq_data->irq = child->irq; |
846 | irq_data->common = child->common; | 846 | irq_data->common = child->common; |
847 | irq_data->node = child->node; | ||
848 | irq_data->domain = domain; | 847 | irq_data->domain = domain; |
849 | } | 848 | } |
850 | 849 | ||
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ad1b064f94fe..f9a59f6cabd2 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -192,7 +192,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, | |||
192 | switch (ret) { | 192 | switch (ret) { |
193 | case IRQ_SET_MASK_OK: | 193 | case IRQ_SET_MASK_OK: |
194 | case IRQ_SET_MASK_OK_DONE: | 194 | case IRQ_SET_MASK_OK_DONE: |
195 | cpumask_copy(data->affinity, mask); | 195 | cpumask_copy(desc->irq_common_data.affinity, mask); |
196 | case IRQ_SET_MASK_OK_NOCOPY: | 196 | case IRQ_SET_MASK_OK_NOCOPY: |
197 | irq_set_thread_affinity(desc); | 197 | irq_set_thread_affinity(desc); |
198 | ret = 0; | 198 | ret = 0; |
@@ -304,7 +304,7 @@ static void irq_affinity_notify(struct work_struct *work) | |||
304 | if (irq_move_pending(&desc->irq_data)) | 304 | if (irq_move_pending(&desc->irq_data)) |
305 | irq_get_pending(cpumask, desc); | 305 | irq_get_pending(cpumask, desc); |
306 | else | 306 | else |
307 | cpumask_copy(cpumask, desc->irq_data.affinity); | 307 | cpumask_copy(cpumask, desc->irq_common_data.affinity); |
308 | raw_spin_unlock_irqrestore(&desc->lock, flags); | 308 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
309 | 309 | ||
310 | notify->notify(notify, cpumask); | 310 | notify->notify(notify, cpumask); |
@@ -375,9 +375,9 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) | |||
375 | * one of the targets is online. | 375 | * one of the targets is online. |
376 | */ | 376 | */ |
377 | if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { | 377 | if (irqd_has_set(&desc->irq_data, IRQD_AFFINITY_SET)) { |
378 | if (cpumask_intersects(desc->irq_data.affinity, | 378 | if (cpumask_intersects(desc->irq_common_data.affinity, |
379 | cpu_online_mask)) | 379 | cpu_online_mask)) |
380 | set = desc->irq_data.affinity; | 380 | set = desc->irq_common_data.affinity; |
381 | else | 381 | else |
382 | irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); | 382 | irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET); |
383 | } | 383 | } |
@@ -829,8 +829,8 @@ irq_thread_check_affinity(struct irq_desc *desc, struct irqaction *action) | |||
829 | * This code is triggered unconditionally. Check the affinity | 829 | * This code is triggered unconditionally. Check the affinity |
830 | * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. | 830 | * mask pointer. For CPU_MASK_OFFSTACK=n this is optimized out. |
831 | */ | 831 | */ |
832 | if (desc->irq_data.affinity) | 832 | if (desc->irq_common_data.affinity) |
833 | cpumask_copy(mask, desc->irq_data.affinity); | 833 | cpumask_copy(mask, desc->irq_common_data.affinity); |
834 | else | 834 | else |
835 | valid = false; | 835 | valid = false; |
836 | raw_spin_unlock_irq(&desc->lock); | 836 | raw_spin_unlock_irq(&desc->lock); |
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 7e6512b9dc1f..be9149f62eb8 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c | |||
@@ -228,11 +228,7 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info) | |||
228 | { | 228 | { |
229 | struct irq_chip *chip = info->chip; | 229 | struct irq_chip *chip = info->chip; |
230 | 230 | ||
231 | BUG_ON(!chip); | 231 | BUG_ON(!chip || !chip->irq_mask || !chip->irq_unmask); |
232 | if (!chip->irq_mask) | ||
233 | chip->irq_mask = pci_msi_mask_irq; | ||
234 | if (!chip->irq_unmask) | ||
235 | chip->irq_unmask = pci_msi_unmask_irq; | ||
236 | if (!chip->irq_set_affinity) | 232 | if (!chip->irq_set_affinity) |
237 | chip->irq_set_affinity = msi_domain_set_affinity; | 233 | chip->irq_set_affinity = msi_domain_set_affinity; |
238 | } | 234 | } |
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 0e97c142ce40..a50ddc9417ff 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/seq_file.h> | 12 | #include <linux/seq_file.h> |
13 | #include <linux/interrupt.h> | 13 | #include <linux/interrupt.h> |
14 | #include <linux/kernel_stat.h> | 14 | #include <linux/kernel_stat.h> |
15 | #include <linux/mutex.h> | ||
15 | 16 | ||
16 | #include "internals.h" | 17 | #include "internals.h" |
17 | 18 | ||
@@ -39,7 +40,7 @@ static struct proc_dir_entry *root_irq_dir; | |||
39 | static int show_irq_affinity(int type, struct seq_file *m, void *v) | 40 | static int show_irq_affinity(int type, struct seq_file *m, void *v) |
40 | { | 41 | { |
41 | struct irq_desc *desc = irq_to_desc((long)m->private); | 42 | struct irq_desc *desc = irq_to_desc((long)m->private); |
42 | const struct cpumask *mask = desc->irq_data.affinity; | 43 | const struct cpumask *mask = desc->irq_common_data.affinity; |
43 | 44 | ||
44 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 45 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
45 | if (irqd_is_setaffinity_pending(&desc->irq_data)) | 46 | if (irqd_is_setaffinity_pending(&desc->irq_data)) |
@@ -323,18 +324,29 @@ void register_handler_proc(unsigned int irq, struct irqaction *action) | |||
323 | 324 | ||
324 | void register_irq_proc(unsigned int irq, struct irq_desc *desc) | 325 | void register_irq_proc(unsigned int irq, struct irq_desc *desc) |
325 | { | 326 | { |
327 | static DEFINE_MUTEX(register_lock); | ||
326 | char name [MAX_NAMELEN]; | 328 | char name [MAX_NAMELEN]; |
327 | 329 | ||
328 | if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip) || desc->dir) | 330 | if (!root_irq_dir || (desc->irq_data.chip == &no_irq_chip)) |
329 | return; | 331 | return; |
330 | 332 | ||
333 | /* | ||
334 | * irq directories are registered only when a handler is | ||
335 | * added, not when the descriptor is created, so multiple | ||
336 | * tasks might try to register at the same time. | ||
337 | */ | ||
338 | mutex_lock(®ister_lock); | ||
339 | |||
340 | if (desc->dir) | ||
341 | goto out_unlock; | ||
342 | |||
331 | memset(name, 0, MAX_NAMELEN); | 343 | memset(name, 0, MAX_NAMELEN); |
332 | sprintf(name, "%d", irq); | 344 | sprintf(name, "%d", irq); |
333 | 345 | ||
334 | /* create /proc/irq/1234 */ | 346 | /* create /proc/irq/1234 */ |
335 | desc->dir = proc_mkdir(name, root_irq_dir); | 347 | desc->dir = proc_mkdir(name, root_irq_dir); |
336 | if (!desc->dir) | 348 | if (!desc->dir) |
337 | return; | 349 | goto out_unlock; |
338 | 350 | ||
339 | #ifdef CONFIG_SMP | 351 | #ifdef CONFIG_SMP |
340 | /* create /proc/irq/<irq>/smp_affinity */ | 352 | /* create /proc/irq/<irq>/smp_affinity */ |
@@ -355,6 +367,9 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc) | |||
355 | 367 | ||
356 | proc_create_data("spurious", 0444, desc->dir, | 368 | proc_create_data("spurious", 0444, desc->dir, |
357 | &irq_spurious_proc_fops, (void *)(long)irq); | 369 | &irq_spurious_proc_fops, (void *)(long)irq); |
370 | |||
371 | out_unlock: | ||
372 | mutex_unlock(®ister_lock); | ||
358 | } | 373 | } |
359 | 374 | ||
360 | void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) | 375 | void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) |
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c index dd95f44f99b2..b86886beee4f 100644 --- a/kernel/irq/resend.c +++ b/kernel/irq/resend.c | |||
@@ -38,7 +38,7 @@ static void resend_irqs(unsigned long arg) | |||
38 | clear_bit(irq, irqs_resend); | 38 | clear_bit(irq, irqs_resend); |
39 | desc = irq_to_desc(irq); | 39 | desc = irq_to_desc(irq); |
40 | local_irq_disable(); | 40 | local_irq_disable(); |
41 | desc->handle_irq(irq, desc); | 41 | desc->handle_irq(desc); |
42 | local_irq_enable(); | 42 | local_irq_enable(); |
43 | } | 43 | } |
44 | } | 44 | } |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 8acfbf773e06..4e49cc4c9952 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -3068,7 +3068,7 @@ static int __lock_is_held(struct lockdep_map *lock); | |||
3068 | static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | 3068 | static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, |
3069 | int trylock, int read, int check, int hardirqs_off, | 3069 | int trylock, int read, int check, int hardirqs_off, |
3070 | struct lockdep_map *nest_lock, unsigned long ip, | 3070 | struct lockdep_map *nest_lock, unsigned long ip, |
3071 | int references) | 3071 | int references, int pin_count) |
3072 | { | 3072 | { |
3073 | struct task_struct *curr = current; | 3073 | struct task_struct *curr = current; |
3074 | struct lock_class *class = NULL; | 3074 | struct lock_class *class = NULL; |
@@ -3157,7 +3157,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3157 | hlock->waittime_stamp = 0; | 3157 | hlock->waittime_stamp = 0; |
3158 | hlock->holdtime_stamp = lockstat_clock(); | 3158 | hlock->holdtime_stamp = lockstat_clock(); |
3159 | #endif | 3159 | #endif |
3160 | hlock->pin_count = 0; | 3160 | hlock->pin_count = pin_count; |
3161 | 3161 | ||
3162 | if (check && !mark_irqflags(curr, hlock)) | 3162 | if (check && !mark_irqflags(curr, hlock)) |
3163 | return 0; | 3163 | return 0; |
@@ -3343,7 +3343,7 @@ found_it: | |||
3343 | hlock_class(hlock)->subclass, hlock->trylock, | 3343 | hlock_class(hlock)->subclass, hlock->trylock, |
3344 | hlock->read, hlock->check, hlock->hardirqs_off, | 3344 | hlock->read, hlock->check, hlock->hardirqs_off, |
3345 | hlock->nest_lock, hlock->acquire_ip, | 3345 | hlock->nest_lock, hlock->acquire_ip, |
3346 | hlock->references)) | 3346 | hlock->references, hlock->pin_count)) |
3347 | return 0; | 3347 | return 0; |
3348 | } | 3348 | } |
3349 | 3349 | ||
@@ -3433,7 +3433,7 @@ found_it: | |||
3433 | hlock_class(hlock)->subclass, hlock->trylock, | 3433 | hlock_class(hlock)->subclass, hlock->trylock, |
3434 | hlock->read, hlock->check, hlock->hardirqs_off, | 3434 | hlock->read, hlock->check, hlock->hardirqs_off, |
3435 | hlock->nest_lock, hlock->acquire_ip, | 3435 | hlock->nest_lock, hlock->acquire_ip, |
3436 | hlock->references)) | 3436 | hlock->references, hlock->pin_count)) |
3437 | return 0; | 3437 | return 0; |
3438 | } | 3438 | } |
3439 | 3439 | ||
@@ -3583,7 +3583,7 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3583 | current->lockdep_recursion = 1; | 3583 | current->lockdep_recursion = 1; |
3584 | trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); | 3584 | trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip); |
3585 | __lock_acquire(lock, subclass, trylock, read, check, | 3585 | __lock_acquire(lock, subclass, trylock, read, check, |
3586 | irqs_disabled_flags(flags), nest_lock, ip, 0); | 3586 | irqs_disabled_flags(flags), nest_lock, ip, 0, 0); |
3587 | current->lockdep_recursion = 0; | 3587 | current->lockdep_recursion = 0; |
3588 | raw_local_irq_restore(flags); | 3588 | raw_local_irq_restore(flags); |
3589 | } | 3589 | } |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 337c8818541d..87e9ce6a63c5 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -289,7 +289,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
289 | if (pv_enabled()) | 289 | if (pv_enabled()) |
290 | goto queue; | 290 | goto queue; |
291 | 291 | ||
292 | if (virt_queued_spin_lock(lock)) | 292 | if (virt_spin_lock(lock)) |
293 | return; | 293 | return; |
294 | 294 | ||
295 | /* | 295 | /* |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 9f75f25cc5d9..775d36cc0050 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -3868,6 +3868,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf) | |||
3868 | static void __init | 3868 | static void __init |
3869 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | 3869 | rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) |
3870 | { | 3870 | { |
3871 | static struct lock_class_key rcu_exp_sched_rdp_class; | ||
3871 | unsigned long flags; | 3872 | unsigned long flags; |
3872 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 3873 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); |
3873 | struct rcu_node *rnp = rcu_get_root(rsp); | 3874 | struct rcu_node *rnp = rcu_get_root(rsp); |
@@ -3883,6 +3884,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3883 | mutex_init(&rdp->exp_funnel_mutex); | 3884 | mutex_init(&rdp->exp_funnel_mutex); |
3884 | rcu_boot_init_nocb_percpu_data(rdp); | 3885 | rcu_boot_init_nocb_percpu_data(rdp); |
3885 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 3886 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
3887 | if (rsp == &rcu_sched_state) | ||
3888 | lockdep_set_class_and_name(&rdp->exp_funnel_mutex, | ||
3889 | &rcu_exp_sched_rdp_class, | ||
3890 | "rcu_data_exp_sched"); | ||
3886 | } | 3891 | } |
3887 | 3892 | ||
3888 | /* | 3893 | /* |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3595403921bd..10a8faa1b0d4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -621,18 +621,21 @@ int get_nohz_timer_target(void) | |||
621 | int i, cpu = smp_processor_id(); | 621 | int i, cpu = smp_processor_id(); |
622 | struct sched_domain *sd; | 622 | struct sched_domain *sd; |
623 | 623 | ||
624 | if (!idle_cpu(cpu)) | 624 | if (!idle_cpu(cpu) && is_housekeeping_cpu(cpu)) |
625 | return cpu; | 625 | return cpu; |
626 | 626 | ||
627 | rcu_read_lock(); | 627 | rcu_read_lock(); |
628 | for_each_domain(cpu, sd) { | 628 | for_each_domain(cpu, sd) { |
629 | for_each_cpu(i, sched_domain_span(sd)) { | 629 | for_each_cpu(i, sched_domain_span(sd)) { |
630 | if (!idle_cpu(i)) { | 630 | if (!idle_cpu(i) && is_housekeeping_cpu(cpu)) { |
631 | cpu = i; | 631 | cpu = i; |
632 | goto unlock; | 632 | goto unlock; |
633 | } | 633 | } |
634 | } | 634 | } |
635 | } | 635 | } |
636 | |||
637 | if (!is_housekeeping_cpu(cpu)) | ||
638 | cpu = housekeeping_any_cpu(); | ||
636 | unlock: | 639 | unlock: |
637 | rcu_read_unlock(); | 640 | rcu_read_unlock(); |
638 | return cpu; | 641 | return cpu; |
@@ -2514,11 +2517,11 @@ static struct rq *finish_task_switch(struct task_struct *prev) | |||
2514 | * If a task dies, then it sets TASK_DEAD in tsk->state and calls | 2517 | * If a task dies, then it sets TASK_DEAD in tsk->state and calls |
2515 | * schedule one last time. The schedule call will never return, and | 2518 | * schedule one last time. The schedule call will never return, and |
2516 | * the scheduled task must drop that reference. | 2519 | * the scheduled task must drop that reference. |
2517 | * The test for TASK_DEAD must occur while the runqueue locks are | 2520 | * |
2518 | * still held, otherwise prev could be scheduled on another cpu, die | 2521 | * We must observe prev->state before clearing prev->on_cpu (in |
2519 | * there before we look at prev->state, and then the reference would | 2522 | * finish_lock_switch), otherwise a concurrent wakeup can get prev |
2520 | * be dropped twice. | 2523 | * running on another CPU and we could rave with its RUNNING -> DEAD |
2521 | * Manfred Spraul <manfred@colorfullife.com> | 2524 | * transition, resulting in a double drop. |
2522 | */ | 2525 | */ |
2523 | prev_state = prev->state; | 2526 | prev_state = prev->state; |
2524 | vtime_task_switch(prev); | 2527 | vtime_task_switch(prev); |
@@ -2666,13 +2669,20 @@ unsigned long nr_running(void) | |||
2666 | 2669 | ||
2667 | /* | 2670 | /* |
2668 | * Check if only the current task is running on the cpu. | 2671 | * Check if only the current task is running on the cpu. |
2672 | * | ||
2673 | * Caution: this function does not check that the caller has disabled | ||
2674 | * preemption, thus the result might have a time-of-check-to-time-of-use | ||
2675 | * race. The caller is responsible to use it correctly, for example: | ||
2676 | * | ||
2677 | * - from a non-preemptable section (of course) | ||
2678 | * | ||
2679 | * - from a thread that is bound to a single CPU | ||
2680 | * | ||
2681 | * - in a loop with very short iterations (e.g. a polling loop) | ||
2669 | */ | 2682 | */ |
2670 | bool single_task_running(void) | 2683 | bool single_task_running(void) |
2671 | { | 2684 | { |
2672 | if (cpu_rq(smp_processor_id())->nr_running == 1) | 2685 | return raw_rq()->nr_running == 1; |
2673 | return true; | ||
2674 | else | ||
2675 | return false; | ||
2676 | } | 2686 | } |
2677 | EXPORT_SYMBOL(single_task_running); | 2687 | EXPORT_SYMBOL(single_task_running); |
2678 | 2688 | ||
@@ -4924,7 +4934,15 @@ void init_idle(struct task_struct *idle, int cpu) | |||
4924 | idle->state = TASK_RUNNING; | 4934 | idle->state = TASK_RUNNING; |
4925 | idle->se.exec_start = sched_clock(); | 4935 | idle->se.exec_start = sched_clock(); |
4926 | 4936 | ||
4927 | do_set_cpus_allowed(idle, cpumask_of(cpu)); | 4937 | #ifdef CONFIG_SMP |
4938 | /* | ||
4939 | * Its possible that init_idle() gets called multiple times on a task, | ||
4940 | * in that case do_set_cpus_allowed() will not do the right thing. | ||
4941 | * | ||
4942 | * And since this is boot we can forgo the serialization. | ||
4943 | */ | ||
4944 | set_cpus_allowed_common(idle, cpumask_of(cpu)); | ||
4945 | #endif | ||
4928 | /* | 4946 | /* |
4929 | * We're having a chicken and egg problem, even though we are | 4947 | * We're having a chicken and egg problem, even though we are |
4930 | * holding rq->lock, the cpu isn't yet set to this cpu so the | 4948 | * holding rq->lock, the cpu isn't yet set to this cpu so the |
@@ -4941,7 +4959,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
4941 | 4959 | ||
4942 | rq->curr = rq->idle = idle; | 4960 | rq->curr = rq->idle = idle; |
4943 | idle->on_rq = TASK_ON_RQ_QUEUED; | 4961 | idle->on_rq = TASK_ON_RQ_QUEUED; |
4944 | #if defined(CONFIG_SMP) | 4962 | #ifdef CONFIG_SMP |
4945 | idle->on_cpu = 1; | 4963 | idle->on_cpu = 1; |
4946 | #endif | 4964 | #endif |
4947 | raw_spin_unlock(&rq->lock); | 4965 | raw_spin_unlock(&rq->lock); |
@@ -4956,7 +4974,7 @@ void init_idle(struct task_struct *idle, int cpu) | |||
4956 | idle->sched_class = &idle_sched_class; | 4974 | idle->sched_class = &idle_sched_class; |
4957 | ftrace_graph_init_idle_task(idle, cpu); | 4975 | ftrace_graph_init_idle_task(idle, cpu); |
4958 | vtime_init_idle(idle, cpu); | 4976 | vtime_init_idle(idle, cpu); |
4959 | #if defined(CONFIG_SMP) | 4977 | #ifdef CONFIG_SMP |
4960 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); | 4978 | sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); |
4961 | #endif | 4979 | #endif |
4962 | } | 4980 | } |
@@ -5178,24 +5196,47 @@ static void migrate_tasks(struct rq *dead_rq) | |||
5178 | break; | 5196 | break; |
5179 | 5197 | ||
5180 | /* | 5198 | /* |
5181 | * Ensure rq->lock covers the entire task selection | 5199 | * pick_next_task assumes pinned rq->lock. |
5182 | * until the migration. | ||
5183 | */ | 5200 | */ |
5184 | lockdep_pin_lock(&rq->lock); | 5201 | lockdep_pin_lock(&rq->lock); |
5185 | next = pick_next_task(rq, &fake_task); | 5202 | next = pick_next_task(rq, &fake_task); |
5186 | BUG_ON(!next); | 5203 | BUG_ON(!next); |
5187 | next->sched_class->put_prev_task(rq, next); | 5204 | next->sched_class->put_prev_task(rq, next); |
5188 | 5205 | ||
5206 | /* | ||
5207 | * Rules for changing task_struct::cpus_allowed are holding | ||
5208 | * both pi_lock and rq->lock, such that holding either | ||
5209 | * stabilizes the mask. | ||
5210 | * | ||
5211 | * Drop rq->lock is not quite as disastrous as it usually is | ||
5212 | * because !cpu_active at this point, which means load-balance | ||
5213 | * will not interfere. Also, stop-machine. | ||
5214 | */ | ||
5215 | lockdep_unpin_lock(&rq->lock); | ||
5216 | raw_spin_unlock(&rq->lock); | ||
5217 | raw_spin_lock(&next->pi_lock); | ||
5218 | raw_spin_lock(&rq->lock); | ||
5219 | |||
5220 | /* | ||
5221 | * Since we're inside stop-machine, _nothing_ should have | ||
5222 | * changed the task, WARN if weird stuff happened, because in | ||
5223 | * that case the above rq->lock drop is a fail too. | ||
5224 | */ | ||
5225 | if (WARN_ON(task_rq(next) != rq || !task_on_rq_queued(next))) { | ||
5226 | raw_spin_unlock(&next->pi_lock); | ||
5227 | continue; | ||
5228 | } | ||
5229 | |||
5189 | /* Find suitable destination for @next, with force if needed. */ | 5230 | /* Find suitable destination for @next, with force if needed. */ |
5190 | dest_cpu = select_fallback_rq(dead_rq->cpu, next); | 5231 | dest_cpu = select_fallback_rq(dead_rq->cpu, next); |
5191 | 5232 | ||
5192 | lockdep_unpin_lock(&rq->lock); | ||
5193 | rq = __migrate_task(rq, next, dest_cpu); | 5233 | rq = __migrate_task(rq, next, dest_cpu); |
5194 | if (rq != dead_rq) { | 5234 | if (rq != dead_rq) { |
5195 | raw_spin_unlock(&rq->lock); | 5235 | raw_spin_unlock(&rq->lock); |
5196 | rq = dead_rq; | 5236 | rq = dead_rq; |
5197 | raw_spin_lock(&rq->lock); | 5237 | raw_spin_lock(&rq->lock); |
5198 | } | 5238 | } |
5239 | raw_spin_unlock(&next->pi_lock); | ||
5199 | } | 5240 | } |
5200 | 5241 | ||
5201 | rq->stop = stop; | 5242 | rq->stop = stop; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 68cda117574c..6d2a119c7ad9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1078,9 +1078,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
1078 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | 1078 | * After ->on_cpu is cleared, the task can be moved to a different CPU. |
1079 | * We must ensure this doesn't happen until the switch is completely | 1079 | * We must ensure this doesn't happen until the switch is completely |
1080 | * finished. | 1080 | * finished. |
1081 | * | ||
1082 | * Pairs with the control dependency and rmb in try_to_wake_up(). | ||
1081 | */ | 1083 | */ |
1082 | smp_wmb(); | 1084 | smp_store_release(&prev->on_cpu, 0); |
1083 | prev->on_cpu = 0; | ||
1084 | #endif | 1085 | #endif |
1085 | #ifdef CONFIG_DEBUG_SPINLOCK | 1086 | #ifdef CONFIG_DEBUG_SPINLOCK |
1086 | /* this is a valid case when another task releases the spinlock */ | 1087 | /* this is a valid case when another task releases the spinlock */ |
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 272d9322bc5d..052e02672d12 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c | |||
@@ -106,10 +106,9 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr) | |||
106 | } | 106 | } |
107 | EXPORT_SYMBOL_GPL(__wake_up_locked); | 107 | EXPORT_SYMBOL_GPL(__wake_up_locked); |
108 | 108 | ||
109 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, int nr, | 109 | void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) |
110 | void *key) | ||
111 | { | 110 | { |
112 | __wake_up_common(q, mode, nr, 0, key); | 111 | __wake_up_common(q, mode, 1, 0, key); |
113 | } | 112 | } |
114 | EXPORT_SYMBOL_GPL(__wake_up_locked_key); | 113 | EXPORT_SYMBOL_GPL(__wake_up_locked_key); |
115 | 114 | ||
@@ -284,7 +283,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, | |||
284 | if (!list_empty(&wait->task_list)) | 283 | if (!list_empty(&wait->task_list)) |
285 | list_del_init(&wait->task_list); | 284 | list_del_init(&wait->task_list); |
286 | else if (waitqueue_active(q)) | 285 | else if (waitqueue_active(q)) |
287 | __wake_up_locked_key(q, mode, 1, key); | 286 | __wake_up_locked_key(q, mode, key); |
288 | spin_unlock_irqrestore(&q->lock, flags); | 287 | spin_unlock_irqrestore(&q->lock, flags); |
289 | } | 288 | } |
290 | EXPORT_SYMBOL(abort_exclusive_wait); | 289 | EXPORT_SYMBOL(abort_exclusive_wait); |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 50eb107f1198..a9b76a40319e 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -97,20 +97,6 @@ EXPORT_SYMBOL_GPL(clockevent_delta2ns); | |||
97 | static int __clockevents_switch_state(struct clock_event_device *dev, | 97 | static int __clockevents_switch_state(struct clock_event_device *dev, |
98 | enum clock_event_state state) | 98 | enum clock_event_state state) |
99 | { | 99 | { |
100 | /* Transition with legacy set_mode() callback */ | ||
101 | if (dev->set_mode) { | ||
102 | /* Legacy callback doesn't support new modes */ | ||
103 | if (state > CLOCK_EVT_STATE_ONESHOT) | ||
104 | return -ENOSYS; | ||
105 | /* | ||
106 | * 'clock_event_state' and 'clock_event_mode' have 1-to-1 | ||
107 | * mapping until *_ONESHOT, and so a simple cast will work. | ||
108 | */ | ||
109 | dev->set_mode((enum clock_event_mode)state, dev); | ||
110 | dev->mode = (enum clock_event_mode)state; | ||
111 | return 0; | ||
112 | } | ||
113 | |||
114 | if (dev->features & CLOCK_EVT_FEAT_DUMMY) | 100 | if (dev->features & CLOCK_EVT_FEAT_DUMMY) |
115 | return 0; | 101 | return 0; |
116 | 102 | ||
@@ -204,12 +190,8 @@ int clockevents_tick_resume(struct clock_event_device *dev) | |||
204 | { | 190 | { |
205 | int ret = 0; | 191 | int ret = 0; |
206 | 192 | ||
207 | if (dev->set_mode) { | 193 | if (dev->tick_resume) |
208 | dev->set_mode(CLOCK_EVT_MODE_RESUME, dev); | ||
209 | dev->mode = CLOCK_EVT_MODE_RESUME; | ||
210 | } else if (dev->tick_resume) { | ||
211 | ret = dev->tick_resume(dev); | 194 | ret = dev->tick_resume(dev); |
212 | } | ||
213 | 195 | ||
214 | return ret; | 196 | return ret; |
215 | } | 197 | } |
@@ -460,26 +442,6 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu) | |||
460 | } | 442 | } |
461 | EXPORT_SYMBOL_GPL(clockevents_unbind_device); | 443 | EXPORT_SYMBOL_GPL(clockevents_unbind_device); |
462 | 444 | ||
463 | /* Sanity check of state transition callbacks */ | ||
464 | static int clockevents_sanity_check(struct clock_event_device *dev) | ||
465 | { | ||
466 | /* Legacy set_mode() callback */ | ||
467 | if (dev->set_mode) { | ||
468 | /* We shouldn't be supporting new modes now */ | ||
469 | WARN_ON(dev->set_state_periodic || dev->set_state_oneshot || | ||
470 | dev->set_state_shutdown || dev->tick_resume || | ||
471 | dev->set_state_oneshot_stopped); | ||
472 | |||
473 | BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED); | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | if (dev->features & CLOCK_EVT_FEAT_DUMMY) | ||
478 | return 0; | ||
479 | |||
480 | return 0; | ||
481 | } | ||
482 | |||
483 | /** | 445 | /** |
484 | * clockevents_register_device - register a clock event device | 446 | * clockevents_register_device - register a clock event device |
485 | * @dev: device to register | 447 | * @dev: device to register |
@@ -488,8 +450,6 @@ void clockevents_register_device(struct clock_event_device *dev) | |||
488 | { | 450 | { |
489 | unsigned long flags; | 451 | unsigned long flags; |
490 | 452 | ||
491 | BUG_ON(clockevents_sanity_check(dev)); | ||
492 | |||
493 | /* Initialize state to DETACHED */ | 453 | /* Initialize state to DETACHED */ |
494 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); | 454 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); |
495 | 455 | ||
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 841b72f720e8..3a38775b50c2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c | |||
@@ -217,7 +217,7 @@ static void clocksource_watchdog(unsigned long data) | |||
217 | continue; | 217 | continue; |
218 | 218 | ||
219 | /* Check the deviation from the watchdog clocksource. */ | 219 | /* Check the deviation from the watchdog clocksource. */ |
220 | if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) { | 220 | if (abs64(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { |
221 | pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", | 221 | pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable because the skew is too large:\n", |
222 | cs->name); | 222 | cs->name); |
223 | pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", | 223 | pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", |
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index d11c55b6ab7d..4fcd99e12aa0 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c | |||
@@ -398,7 +398,6 @@ void tick_shutdown(unsigned int cpu) | |||
398 | * the set mode function! | 398 | * the set mode function! |
399 | */ | 399 | */ |
400 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); | 400 | clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); |
401 | dev->mode = CLOCK_EVT_MODE_UNUSED; | ||
402 | clockevents_exchange_device(dev, NULL); | 401 | clockevents_exchange_device(dev, NULL); |
403 | dev->event_handler = clockevents_handle_noop; | 402 | dev->event_handler = clockevents_handle_noop; |
404 | td->evtdev = NULL; | 403 | td->evtdev = NULL; |
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3319e16f31e5..7c7ec4515983 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c | |||
@@ -290,16 +290,17 @@ static int __init tick_nohz_full_setup(char *str) | |||
290 | __setup("nohz_full=", tick_nohz_full_setup); | 290 | __setup("nohz_full=", tick_nohz_full_setup); |
291 | 291 | ||
292 | static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, | 292 | static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, |
293 | unsigned long action, | 293 | unsigned long action, |
294 | void *hcpu) | 294 | void *hcpu) |
295 | { | 295 | { |
296 | unsigned int cpu = (unsigned long)hcpu; | 296 | unsigned int cpu = (unsigned long)hcpu; |
297 | 297 | ||
298 | switch (action & ~CPU_TASKS_FROZEN) { | 298 | switch (action & ~CPU_TASKS_FROZEN) { |
299 | case CPU_DOWN_PREPARE: | 299 | case CPU_DOWN_PREPARE: |
300 | /* | 300 | /* |
301 | * If we handle the timekeeping duty for full dynticks CPUs, | 301 | * The boot CPU handles housekeeping duty (unbound timers, |
302 | * we can't safely shutdown that CPU. | 302 | * workqueues, timekeeping, ...) on behalf of full dynticks |
303 | * CPUs. It must remain online when nohz full is enabled. | ||
303 | */ | 304 | */ |
304 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) | 305 | if (tick_nohz_full_running && tick_do_timer_cpu == cpu) |
305 | return NOTIFY_BAD; | 306 | return NOTIFY_BAD; |
@@ -370,6 +371,12 @@ void __init tick_nohz_init(void) | |||
370 | cpu_notifier(tick_nohz_cpu_down_callback, 0); | 371 | cpu_notifier(tick_nohz_cpu_down_callback, 0); |
371 | pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", | 372 | pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", |
372 | cpumask_pr_args(tick_nohz_full_mask)); | 373 | cpumask_pr_args(tick_nohz_full_mask)); |
374 | |||
375 | /* | ||
376 | * We need at least one CPU to handle housekeeping work such | ||
377 | * as timekeeping, unbound timers, workqueues, ... | ||
378 | */ | ||
379 | WARN_ON_ONCE(cpumask_empty(housekeeping_mask)); | ||
373 | } | 380 | } |
374 | #endif | 381 | #endif |
375 | 382 | ||
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index f6ee2e6b6f5d..44d2cc0436f4 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c | |||
@@ -1251,7 +1251,7 @@ void __init timekeeping_init(void) | |||
1251 | set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); | 1251 | set_normalized_timespec64(&tmp, -boot.tv_sec, -boot.tv_nsec); |
1252 | tk_set_wall_to_mono(tk, tmp); | 1252 | tk_set_wall_to_mono(tk, tmp); |
1253 | 1253 | ||
1254 | timekeeping_update(tk, TK_MIRROR); | 1254 | timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); |
1255 | 1255 | ||
1256 | write_seqcount_end(&tk_core.seq); | 1256 | write_seqcount_end(&tk_core.seq); |
1257 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); | 1257 | raw_spin_unlock_irqrestore(&timekeeper_lock, flags); |
@@ -1614,7 +1614,7 @@ static __always_inline void timekeeping_freqadjust(struct timekeeper *tk, | |||
1614 | negative = (tick_error < 0); | 1614 | negative = (tick_error < 0); |
1615 | 1615 | ||
1616 | /* Sort out the magnitude of the correction */ | 1616 | /* Sort out the magnitude of the correction */ |
1617 | tick_error = abs(tick_error); | 1617 | tick_error = abs64(tick_error); |
1618 | for (adj = 0; tick_error > interval; adj++) | 1618 | for (adj = 0; tick_error > interval; adj++) |
1619 | tick_error >>= 1; | 1619 | tick_error >>= 1; |
1620 | 1620 | ||
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 129c96033e46..f75e35b60149 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c | |||
@@ -225,7 +225,7 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) | |||
225 | (unsigned long long) dev->min_delta_ns); | 225 | (unsigned long long) dev->min_delta_ns); |
226 | SEQ_printf(m, " mult: %u\n", dev->mult); | 226 | SEQ_printf(m, " mult: %u\n", dev->mult); |
227 | SEQ_printf(m, " shift: %u\n", dev->shift); | 227 | SEQ_printf(m, " shift: %u\n", dev->shift); |
228 | SEQ_printf(m, " mode: %d\n", dev->mode); | 228 | SEQ_printf(m, " mode: %d\n", clockevent_get_state(dev)); |
229 | SEQ_printf(m, " next_event: %Ld nsecs\n", | 229 | SEQ_printf(m, " next_event: %Ld nsecs\n", |
230 | (unsigned long long) ktime_to_ns(dev->next_event)); | 230 | (unsigned long long) ktime_to_ns(dev->next_event)); |
231 | 231 | ||
@@ -233,40 +233,34 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu) | |||
233 | print_name_offset(m, dev->set_next_event); | 233 | print_name_offset(m, dev->set_next_event); |
234 | SEQ_printf(m, "\n"); | 234 | SEQ_printf(m, "\n"); |
235 | 235 | ||
236 | if (dev->set_mode) { | 236 | if (dev->set_state_shutdown) { |
237 | SEQ_printf(m, " set_mode: "); | 237 | SEQ_printf(m, " shutdown: "); |
238 | print_name_offset(m, dev->set_mode); | 238 | print_name_offset(m, dev->set_state_shutdown); |
239 | SEQ_printf(m, "\n"); | 239 | SEQ_printf(m, "\n"); |
240 | } else { | 240 | } |
241 | if (dev->set_state_shutdown) { | ||
242 | SEQ_printf(m, " shutdown: "); | ||
243 | print_name_offset(m, dev->set_state_shutdown); | ||
244 | SEQ_printf(m, "\n"); | ||
245 | } | ||
246 | 241 | ||
247 | if (dev->set_state_periodic) { | 242 | if (dev->set_state_periodic) { |
248 | SEQ_printf(m, " periodic: "); | 243 | SEQ_printf(m, " periodic: "); |
249 | print_name_offset(m, dev->set_state_periodic); | 244 | print_name_offset(m, dev->set_state_periodic); |
250 | SEQ_printf(m, "\n"); | 245 | SEQ_printf(m, "\n"); |
251 | } | 246 | } |
252 | 247 | ||
253 | if (dev->set_state_oneshot) { | 248 | if (dev->set_state_oneshot) { |
254 | SEQ_printf(m, " oneshot: "); | 249 | SEQ_printf(m, " oneshot: "); |
255 | print_name_offset(m, dev->set_state_oneshot); | 250 | print_name_offset(m, dev->set_state_oneshot); |
256 | SEQ_printf(m, "\n"); | 251 | SEQ_printf(m, "\n"); |
257 | } | 252 | } |
258 | 253 | ||
259 | if (dev->set_state_oneshot_stopped) { | 254 | if (dev->set_state_oneshot_stopped) { |
260 | SEQ_printf(m, " oneshot stopped: "); | 255 | SEQ_printf(m, " oneshot stopped: "); |
261 | print_name_offset(m, dev->set_state_oneshot_stopped); | 256 | print_name_offset(m, dev->set_state_oneshot_stopped); |
262 | SEQ_printf(m, "\n"); | 257 | SEQ_printf(m, "\n"); |
263 | } | 258 | } |
264 | 259 | ||
265 | if (dev->tick_resume) { | 260 | if (dev->tick_resume) { |
266 | SEQ_printf(m, " resume: "); | 261 | SEQ_printf(m, " resume: "); |
267 | print_name_offset(m, dev->tick_resume); | 262 | print_name_offset(m, dev->tick_resume); |
268 | SEQ_printf(m, "\n"); | 263 | SEQ_printf(m, "\n"); |
269 | } | ||
270 | } | 264 | } |
271 | 265 | ||
272 | SEQ_printf(m, " event_handler: "); | 266 | SEQ_printf(m, " event_handler: "); |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca71582fcfab..bcb14cafe007 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -1458,13 +1458,13 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, | |||
1458 | timer_stats_timer_set_start_info(&dwork->timer); | 1458 | timer_stats_timer_set_start_info(&dwork->timer); |
1459 | 1459 | ||
1460 | dwork->wq = wq; | 1460 | dwork->wq = wq; |
1461 | /* timer isn't guaranteed to run in this cpu, record earlier */ | ||
1462 | if (cpu == WORK_CPU_UNBOUND) | ||
1463 | cpu = raw_smp_processor_id(); | ||
1461 | dwork->cpu = cpu; | 1464 | dwork->cpu = cpu; |
1462 | timer->expires = jiffies + delay; | 1465 | timer->expires = jiffies + delay; |
1463 | 1466 | ||
1464 | if (unlikely(cpu != WORK_CPU_UNBOUND)) | 1467 | add_timer_on(timer, cpu); |
1465 | add_timer_on(timer, cpu); | ||
1466 | else | ||
1467 | add_timer(timer); | ||
1468 | } | 1468 | } |
1469 | 1469 | ||
1470 | /** | 1470 | /** |