diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-12-08 20:39:29 -0500 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-12-08 20:39:29 -0500 |
commit | ba00410b8131b23edfb0e09f8b6dd26c8eb621fb (patch) | |
tree | c08504e4d2fa51ac91cef544f336d0169806c49f /kernel | |
parent | 8ce74dd6057832618957fc2cbd38fa959c3a0a6c (diff) | |
parent | aa583096d9767892983332e7c1a984bd17e3cd39 (diff) |
Merge branch 'iov_iter' into for-next
Diffstat (limited to 'kernel')
35 files changed, 410 insertions, 260 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index dc5c77544fd6..17ea6d4a9a24 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -86,7 +86,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/ | |||
86 | obj-$(CONFIG_TRACEPOINTS) += trace/ | 86 | obj-$(CONFIG_TRACEPOINTS) += trace/ |
87 | obj-$(CONFIG_IRQ_WORK) += irq_work.o | 87 | obj-$(CONFIG_IRQ_WORK) += irq_work.o |
88 | obj-$(CONFIG_CPU_PM) += cpu_pm.o | 88 | obj-$(CONFIG_CPU_PM) += cpu_pm.o |
89 | obj-$(CONFIG_NET) += bpf/ | 89 | obj-$(CONFIG_BPF) += bpf/ |
90 | 90 | ||
91 | obj-$(CONFIG_PERF_EVENTS) += events/ | 91 | obj-$(CONFIG_PERF_EVENTS) += events/ |
92 | 92 | ||
diff --git a/kernel/audit.c b/kernel/audit.c index 80983df92cd4..cebb11db4d34 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -739,7 +739,7 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature | |||
739 | 739 | ||
740 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); | 740 | ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE); |
741 | audit_log_task_info(ab, current); | 741 | audit_log_task_info(ab, current); |
742 | audit_log_format(ab, "feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", | 742 | audit_log_format(ab, " feature=%s old=%u new=%u old_lock=%u new_lock=%u res=%d", |
743 | audit_feature_names[which], !!old_feature, !!new_feature, | 743 | audit_feature_names[which], !!old_feature, !!new_feature, |
744 | !!old_lock, !!new_lock, res); | 744 | !!old_lock, !!new_lock, res); |
745 | audit_log_end(ab); | 745 | audit_log_end(ab); |
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index e242e3a9864a..80f29e015570 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c | |||
@@ -154,6 +154,7 @@ static struct audit_chunk *alloc_chunk(int count) | |||
154 | chunk->owners[i].index = i; | 154 | chunk->owners[i].index = i; |
155 | } | 155 | } |
156 | fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); | 156 | fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch); |
157 | chunk->mark.mask = FS_IN_IGNORED; | ||
157 | return chunk; | 158 | return chunk; |
158 | } | 159 | } |
159 | 160 | ||
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 45427239f375..0daf7f6ae7df 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := core.o syscall.o verifier.o | 1 | obj-y := core.o |
2 | 2 | obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o | |
3 | ifdef CONFIG_TEST_BPF | 3 | ifdef CONFIG_TEST_BPF |
4 | obj-y += test_stub.o | 4 | obj-$(CONFIG_BPF_SYSCALL) += test_stub.o |
5 | endif | 5 | endif |
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f0c30c59b317..d6594e457a25 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c | |||
@@ -655,3 +655,12 @@ void bpf_prog_free(struct bpf_prog *fp) | |||
655 | schedule_work(&aux->work); | 655 | schedule_work(&aux->work); |
656 | } | 656 | } |
657 | EXPORT_SYMBOL_GPL(bpf_prog_free); | 657 | EXPORT_SYMBOL_GPL(bpf_prog_free); |
658 | |||
659 | /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call | ||
660 | * skb_copy_bits(), so provide a weak definition of it for NET-less config. | ||
661 | */ | ||
662 | int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, | ||
663 | int len) | ||
664 | { | ||
665 | return -EFAULT; | ||
666 | } | ||
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 801f5f3b9307..9f81818f2941 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c | |||
@@ -1409,7 +1409,8 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) | |||
1409 | if (memcmp(&old->regs[i], &cur->regs[i], | 1409 | if (memcmp(&old->regs[i], &cur->regs[i], |
1410 | sizeof(old->regs[0])) != 0) { | 1410 | sizeof(old->regs[0])) != 0) { |
1411 | if (old->regs[i].type == NOT_INIT || | 1411 | if (old->regs[i].type == NOT_INIT || |
1412 | old->regs[i].type == UNKNOWN_VALUE) | 1412 | (old->regs[i].type == UNKNOWN_VALUE && |
1413 | cur->regs[i].type != NOT_INIT)) | ||
1413 | continue; | 1414 | continue; |
1414 | return false; | 1415 | return false; |
1415 | } | 1416 | } |
diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 5664985c46a0..937ecdfdf258 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c | |||
@@ -107,46 +107,6 @@ void context_tracking_user_enter(void) | |||
107 | } | 107 | } |
108 | NOKPROBE_SYMBOL(context_tracking_user_enter); | 108 | NOKPROBE_SYMBOL(context_tracking_user_enter); |
109 | 109 | ||
110 | #ifdef CONFIG_PREEMPT | ||
111 | /** | ||
112 | * preempt_schedule_context - preempt_schedule called by tracing | ||
113 | * | ||
114 | * The tracing infrastructure uses preempt_enable_notrace to prevent | ||
115 | * recursion and tracing preempt enabling caused by the tracing | ||
116 | * infrastructure itself. But as tracing can happen in areas coming | ||
117 | * from userspace or just about to enter userspace, a preempt enable | ||
118 | * can occur before user_exit() is called. This will cause the scheduler | ||
119 | * to be called when the system is still in usermode. | ||
120 | * | ||
121 | * To prevent this, the preempt_enable_notrace will use this function | ||
122 | * instead of preempt_schedule() to exit user context if needed before | ||
123 | * calling the scheduler. | ||
124 | */ | ||
125 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) | ||
126 | { | ||
127 | enum ctx_state prev_ctx; | ||
128 | |||
129 | if (likely(!preemptible())) | ||
130 | return; | ||
131 | |||
132 | /* | ||
133 | * Need to disable preemption in case user_exit() is traced | ||
134 | * and the tracer calls preempt_enable_notrace() causing | ||
135 | * an infinite recursion. | ||
136 | */ | ||
137 | preempt_disable_notrace(); | ||
138 | prev_ctx = exception_enter(); | ||
139 | preempt_enable_no_resched_notrace(); | ||
140 | |||
141 | preempt_schedule(); | ||
142 | |||
143 | preempt_disable_notrace(); | ||
144 | exception_exit(prev_ctx); | ||
145 | preempt_enable_notrace(); | ||
146 | } | ||
147 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | ||
148 | #endif /* CONFIG_PREEMPT */ | ||
149 | |||
150 | /** | 110 | /** |
151 | * context_tracking_user_exit - Inform the context tracking that the CPU is | 111 | * context_tracking_user_exit - Inform the context tracking that the CPU is |
152 | * exiting userspace mode and entering the kernel. | 112 | * exiting userspace mode and entering the kernel. |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 356450f09c1f..90a3d017b90c 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -64,6 +64,8 @@ static struct { | |||
64 | * an ongoing cpu hotplug operation. | 64 | * an ongoing cpu hotplug operation. |
65 | */ | 65 | */ |
66 | int refcount; | 66 | int refcount; |
67 | /* And allows lockless put_online_cpus(). */ | ||
68 | atomic_t puts_pending; | ||
67 | 69 | ||
68 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 70 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
69 | struct lockdep_map dep_map; | 71 | struct lockdep_map dep_map; |
@@ -113,7 +115,11 @@ void put_online_cpus(void) | |||
113 | { | 115 | { |
114 | if (cpu_hotplug.active_writer == current) | 116 | if (cpu_hotplug.active_writer == current) |
115 | return; | 117 | return; |
116 | mutex_lock(&cpu_hotplug.lock); | 118 | if (!mutex_trylock(&cpu_hotplug.lock)) { |
119 | atomic_inc(&cpu_hotplug.puts_pending); | ||
120 | cpuhp_lock_release(); | ||
121 | return; | ||
122 | } | ||
117 | 123 | ||
118 | if (WARN_ON(!cpu_hotplug.refcount)) | 124 | if (WARN_ON(!cpu_hotplug.refcount)) |
119 | cpu_hotplug.refcount++; /* try to fix things up */ | 125 | cpu_hotplug.refcount++; /* try to fix things up */ |
@@ -155,6 +161,12 @@ void cpu_hotplug_begin(void) | |||
155 | cpuhp_lock_acquire(); | 161 | cpuhp_lock_acquire(); |
156 | for (;;) { | 162 | for (;;) { |
157 | mutex_lock(&cpu_hotplug.lock); | 163 | mutex_lock(&cpu_hotplug.lock); |
164 | if (atomic_read(&cpu_hotplug.puts_pending)) { | ||
165 | int delta; | ||
166 | |||
167 | delta = atomic_xchg(&cpu_hotplug.puts_pending, 0); | ||
168 | cpu_hotplug.refcount -= delta; | ||
169 | } | ||
158 | if (likely(!cpu_hotplug.refcount)) | 170 | if (likely(!cpu_hotplug.refcount)) |
159 | break; | 171 | break; |
160 | __set_current_state(TASK_UNINTERRUPTIBLE); | 172 | __set_current_state(TASK_UNINTERRUPTIBLE); |
diff --git a/kernel/events/core.c b/kernel/events/core.c index 446fbeefad1c..e56923026dd8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -1562,8 +1562,10 @@ static void perf_remove_from_context(struct perf_event *event, bool detach_group | |||
1562 | 1562 | ||
1563 | if (!task) { | 1563 | if (!task) { |
1564 | /* | 1564 | /* |
1565 | * Per cpu events are removed via an smp call and | 1565 | * Per cpu events are removed via an smp call. The removal can |
1566 | * the removal is always successful. | 1566 | * fail if the CPU is currently offline, but in that case we |
1567 | * already called __perf_remove_from_context from | ||
1568 | * perf_event_exit_cpu. | ||
1567 | */ | 1569 | */ |
1568 | cpu_function_call(event->cpu, __perf_remove_from_context, &re); | 1570 | cpu_function_call(event->cpu, __perf_remove_from_context, &re); |
1569 | return; | 1571 | return; |
@@ -6071,11 +6073,6 @@ static int perf_swevent_init(struct perf_event *event) | |||
6071 | return 0; | 6073 | return 0; |
6072 | } | 6074 | } |
6073 | 6075 | ||
6074 | static int perf_swevent_event_idx(struct perf_event *event) | ||
6075 | { | ||
6076 | return 0; | ||
6077 | } | ||
6078 | |||
6079 | static struct pmu perf_swevent = { | 6076 | static struct pmu perf_swevent = { |
6080 | .task_ctx_nr = perf_sw_context, | 6077 | .task_ctx_nr = perf_sw_context, |
6081 | 6078 | ||
@@ -6085,8 +6082,6 @@ static struct pmu perf_swevent = { | |||
6085 | .start = perf_swevent_start, | 6082 | .start = perf_swevent_start, |
6086 | .stop = perf_swevent_stop, | 6083 | .stop = perf_swevent_stop, |
6087 | .read = perf_swevent_read, | 6084 | .read = perf_swevent_read, |
6088 | |||
6089 | .event_idx = perf_swevent_event_idx, | ||
6090 | }; | 6085 | }; |
6091 | 6086 | ||
6092 | #ifdef CONFIG_EVENT_TRACING | 6087 | #ifdef CONFIG_EVENT_TRACING |
@@ -6204,8 +6199,6 @@ static struct pmu perf_tracepoint = { | |||
6204 | .start = perf_swevent_start, | 6199 | .start = perf_swevent_start, |
6205 | .stop = perf_swevent_stop, | 6200 | .stop = perf_swevent_stop, |
6206 | .read = perf_swevent_read, | 6201 | .read = perf_swevent_read, |
6207 | |||
6208 | .event_idx = perf_swevent_event_idx, | ||
6209 | }; | 6202 | }; |
6210 | 6203 | ||
6211 | static inline void perf_tp_register(void) | 6204 | static inline void perf_tp_register(void) |
@@ -6431,8 +6424,6 @@ static struct pmu perf_cpu_clock = { | |||
6431 | .start = cpu_clock_event_start, | 6424 | .start = cpu_clock_event_start, |
6432 | .stop = cpu_clock_event_stop, | 6425 | .stop = cpu_clock_event_stop, |
6433 | .read = cpu_clock_event_read, | 6426 | .read = cpu_clock_event_read, |
6434 | |||
6435 | .event_idx = perf_swevent_event_idx, | ||
6436 | }; | 6427 | }; |
6437 | 6428 | ||
6438 | /* | 6429 | /* |
@@ -6511,8 +6502,6 @@ static struct pmu perf_task_clock = { | |||
6511 | .start = task_clock_event_start, | 6502 | .start = task_clock_event_start, |
6512 | .stop = task_clock_event_stop, | 6503 | .stop = task_clock_event_stop, |
6513 | .read = task_clock_event_read, | 6504 | .read = task_clock_event_read, |
6514 | |||
6515 | .event_idx = perf_swevent_event_idx, | ||
6516 | }; | 6505 | }; |
6517 | 6506 | ||
6518 | static void perf_pmu_nop_void(struct pmu *pmu) | 6507 | static void perf_pmu_nop_void(struct pmu *pmu) |
@@ -6542,7 +6531,7 @@ static void perf_pmu_cancel_txn(struct pmu *pmu) | |||
6542 | 6531 | ||
6543 | static int perf_event_idx_default(struct perf_event *event) | 6532 | static int perf_event_idx_default(struct perf_event *event) |
6544 | { | 6533 | { |
6545 | return event->hw.idx + 1; | 6534 | return 0; |
6546 | } | 6535 | } |
6547 | 6536 | ||
6548 | /* | 6537 | /* |
@@ -8130,7 +8119,7 @@ static void perf_pmu_rotate_stop(struct pmu *pmu) | |||
8130 | 8119 | ||
8131 | static void __perf_event_exit_context(void *__info) | 8120 | static void __perf_event_exit_context(void *__info) |
8132 | { | 8121 | { |
8133 | struct remove_event re = { .detach_group = false }; | 8122 | struct remove_event re = { .detach_group = true }; |
8134 | struct perf_event_context *ctx = __info; | 8123 | struct perf_event_context *ctx = __info; |
8135 | 8124 | ||
8136 | perf_pmu_rotate_stop(ctx->pmu); | 8125 | perf_pmu_rotate_stop(ctx->pmu); |
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index 1559fb0b9296..9803a6600d49 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c | |||
@@ -605,11 +605,6 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags) | |||
605 | bp->hw.state = PERF_HES_STOPPED; | 605 | bp->hw.state = PERF_HES_STOPPED; |
606 | } | 606 | } |
607 | 607 | ||
608 | static int hw_breakpoint_event_idx(struct perf_event *bp) | ||
609 | { | ||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static struct pmu perf_breakpoint = { | 608 | static struct pmu perf_breakpoint = { |
614 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ | 609 | .task_ctx_nr = perf_sw_context, /* could eventually get its own */ |
615 | 610 | ||
@@ -619,8 +614,6 @@ static struct pmu perf_breakpoint = { | |||
619 | .start = hw_breakpoint_start, | 614 | .start = hw_breakpoint_start, |
620 | .stop = hw_breakpoint_stop, | 615 | .stop = hw_breakpoint_stop, |
621 | .read = hw_breakpoint_pmu_read, | 616 | .read = hw_breakpoint_pmu_read, |
622 | |||
623 | .event_idx = hw_breakpoint_event_idx, | ||
624 | }; | 617 | }; |
625 | 618 | ||
626 | int __init init_hw_breakpoint(void) | 619 | int __init init_hw_breakpoint(void) |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 1d0af8a2c646..ed8f2cde34c5 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
@@ -1640,7 +1640,6 @@ bool uprobe_deny_signal(void) | |||
1640 | if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { | 1640 | if (__fatal_signal_pending(t) || arch_uprobe_xol_was_trapped(t)) { |
1641 | utask->state = UTASK_SSTEP_TRAPPED; | 1641 | utask->state = UTASK_SSTEP_TRAPPED; |
1642 | set_tsk_thread_flag(t, TIF_UPROBE); | 1642 | set_tsk_thread_flag(t, TIF_UPROBE); |
1643 | set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); | ||
1644 | } | 1643 | } |
1645 | } | 1644 | } |
1646 | 1645 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index f3a3a071283c..63678b573d61 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -143,9 +143,8 @@ | |||
143 | * | 143 | * |
144 | * Where (A) orders the waiters increment and the futex value read through | 144 | * Where (A) orders the waiters increment and the futex value read through |
145 | * atomic operations (see hb_waiters_inc) and where (B) orders the write | 145 | * atomic operations (see hb_waiters_inc) and where (B) orders the write |
146 | * to futex and the waiters read -- this is done by the barriers in | 146 | * to futex and the waiters read -- this is done by the barriers for both |
147 | * get_futex_key_refs(), through either ihold or atomic_inc, depending on the | 147 | * shared and private futexes in get_futex_key_refs(). |
148 | * futex type. | ||
149 | * | 148 | * |
150 | * This yields the following case (where X:=waiters, Y:=futex): | 149 | * This yields the following case (where X:=waiters, Y:=futex): |
151 | * | 150 | * |
@@ -344,13 +343,20 @@ static void get_futex_key_refs(union futex_key *key) | |||
344 | futex_get_mm(key); /* implies MB (B) */ | 343 | futex_get_mm(key); /* implies MB (B) */ |
345 | break; | 344 | break; |
346 | default: | 345 | default: |
346 | /* | ||
347 | * Private futexes do not hold reference on an inode or | ||
348 | * mm, therefore the only purpose of calling get_futex_key_refs | ||
349 | * is because we need the barrier for the lockless waiter check. | ||
350 | */ | ||
347 | smp_mb(); /* explicit MB (B) */ | 351 | smp_mb(); /* explicit MB (B) */ |
348 | } | 352 | } |
349 | } | 353 | } |
350 | 354 | ||
351 | /* | 355 | /* |
352 | * Drop a reference to the resource addressed by a key. | 356 | * Drop a reference to the resource addressed by a key. |
353 | * The hash bucket spinlock must not be held. | 357 | * The hash bucket spinlock must not be held. This is |
358 | * a no-op for private futexes, see comment in the get | ||
359 | * counterpart. | ||
354 | */ | 360 | */ |
355 | static void drop_futex_key_refs(union futex_key *key) | 361 | static void drop_futex_key_refs(union futex_key *key) |
356 | { | 362 | { |
@@ -641,8 +647,14 @@ static struct futex_pi_state * alloc_pi_state(void) | |||
641 | return pi_state; | 647 | return pi_state; |
642 | } | 648 | } |
643 | 649 | ||
650 | /* | ||
651 | * Must be called with the hb lock held. | ||
652 | */ | ||
644 | static void free_pi_state(struct futex_pi_state *pi_state) | 653 | static void free_pi_state(struct futex_pi_state *pi_state) |
645 | { | 654 | { |
655 | if (!pi_state) | ||
656 | return; | ||
657 | |||
646 | if (!atomic_dec_and_test(&pi_state->refcount)) | 658 | if (!atomic_dec_and_test(&pi_state->refcount)) |
647 | return; | 659 | return; |
648 | 660 | ||
@@ -1521,15 +1533,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, | |||
1521 | } | 1533 | } |
1522 | 1534 | ||
1523 | retry: | 1535 | retry: |
1524 | if (pi_state != NULL) { | ||
1525 | /* | ||
1526 | * We will have to lookup the pi_state again, so free this one | ||
1527 | * to keep the accounting correct. | ||
1528 | */ | ||
1529 | free_pi_state(pi_state); | ||
1530 | pi_state = NULL; | ||
1531 | } | ||
1532 | |||
1533 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); | 1536 | ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, VERIFY_READ); |
1534 | if (unlikely(ret != 0)) | 1537 | if (unlikely(ret != 0)) |
1535 | goto out; | 1538 | goto out; |
@@ -1619,6 +1622,8 @@ retry_private: | |||
1619 | case 0: | 1622 | case 0: |
1620 | break; | 1623 | break; |
1621 | case -EFAULT: | 1624 | case -EFAULT: |
1625 | free_pi_state(pi_state); | ||
1626 | pi_state = NULL; | ||
1622 | double_unlock_hb(hb1, hb2); | 1627 | double_unlock_hb(hb1, hb2); |
1623 | hb_waiters_dec(hb2); | 1628 | hb_waiters_dec(hb2); |
1624 | put_futex_key(&key2); | 1629 | put_futex_key(&key2); |
@@ -1634,6 +1639,8 @@ retry_private: | |||
1634 | * exit to complete. | 1639 | * exit to complete. |
1635 | * - The user space value changed. | 1640 | * - The user space value changed. |
1636 | */ | 1641 | */ |
1642 | free_pi_state(pi_state); | ||
1643 | pi_state = NULL; | ||
1637 | double_unlock_hb(hb1, hb2); | 1644 | double_unlock_hb(hb1, hb2); |
1638 | hb_waiters_dec(hb2); | 1645 | hb_waiters_dec(hb2); |
1639 | put_futex_key(&key2); | 1646 | put_futex_key(&key2); |
@@ -1710,6 +1717,7 @@ retry_private: | |||
1710 | } | 1717 | } |
1711 | 1718 | ||
1712 | out_unlock: | 1719 | out_unlock: |
1720 | free_pi_state(pi_state); | ||
1713 | double_unlock_hb(hb1, hb2); | 1721 | double_unlock_hb(hb1, hb2); |
1714 | hb_waiters_dec(hb2); | 1722 | hb_waiters_dec(hb2); |
1715 | 1723 | ||
@@ -1727,8 +1735,6 @@ out_put_keys: | |||
1727 | out_put_key1: | 1735 | out_put_key1: |
1728 | put_futex_key(&key1); | 1736 | put_futex_key(&key1); |
1729 | out: | 1737 | out: |
1730 | if (pi_state != NULL) | ||
1731 | free_pi_state(pi_state); | ||
1732 | return ret ? ret : task_count; | 1738 | return ret ? ret : task_count; |
1733 | } | 1739 | } |
1734 | 1740 | ||
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index cf66c5c8458e..3b7408759bdf 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig | |||
@@ -35,7 +35,7 @@ config GCOV_KERNEL | |||
35 | config GCOV_PROFILE_ALL | 35 | config GCOV_PROFILE_ALL |
36 | bool "Profile entire Kernel" | 36 | bool "Profile entire Kernel" |
37 | depends on GCOV_KERNEL | 37 | depends on GCOV_KERNEL |
38 | depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM | 38 | depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64 |
39 | default n | 39 | default n |
40 | ---help--- | 40 | ---help--- |
41 | This options activates profiling for the entire kernel. | 41 | This options activates profiling for the entire kernel. |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 8637e041a247..80f7a6d00519 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -196,12 +196,34 @@ int __request_module(bool wait, const char *fmt, ...) | |||
196 | EXPORT_SYMBOL(__request_module); | 196 | EXPORT_SYMBOL(__request_module); |
197 | #endif /* CONFIG_MODULES */ | 197 | #endif /* CONFIG_MODULES */ |
198 | 198 | ||
199 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | ||
200 | { | ||
201 | if (info->cleanup) | ||
202 | (*info->cleanup)(info); | ||
203 | kfree(info); | ||
204 | } | ||
205 | |||
206 | static void umh_complete(struct subprocess_info *sub_info) | ||
207 | { | ||
208 | struct completion *comp = xchg(&sub_info->complete, NULL); | ||
209 | /* | ||
210 | * See call_usermodehelper_exec(). If xchg() returns NULL | ||
211 | * we own sub_info, the UMH_KILLABLE caller has gone away | ||
212 | * or the caller used UMH_NO_WAIT. | ||
213 | */ | ||
214 | if (comp) | ||
215 | complete(comp); | ||
216 | else | ||
217 | call_usermodehelper_freeinfo(sub_info); | ||
218 | } | ||
219 | |||
199 | /* | 220 | /* |
200 | * This is the task which runs the usermode application | 221 | * This is the task which runs the usermode application |
201 | */ | 222 | */ |
202 | static int ____call_usermodehelper(void *data) | 223 | static int ____call_usermodehelper(void *data) |
203 | { | 224 | { |
204 | struct subprocess_info *sub_info = data; | 225 | struct subprocess_info *sub_info = data; |
226 | int wait = sub_info->wait & ~UMH_KILLABLE; | ||
205 | struct cred *new; | 227 | struct cred *new; |
206 | int retval; | 228 | int retval; |
207 | 229 | ||
@@ -221,7 +243,7 @@ static int ____call_usermodehelper(void *data) | |||
221 | retval = -ENOMEM; | 243 | retval = -ENOMEM; |
222 | new = prepare_kernel_cred(current); | 244 | new = prepare_kernel_cred(current); |
223 | if (!new) | 245 | if (!new) |
224 | goto fail; | 246 | goto out; |
225 | 247 | ||
226 | spin_lock(&umh_sysctl_lock); | 248 | spin_lock(&umh_sysctl_lock); |
227 | new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); | 249 | new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); |
@@ -233,7 +255,7 @@ static int ____call_usermodehelper(void *data) | |||
233 | retval = sub_info->init(sub_info, new); | 255 | retval = sub_info->init(sub_info, new); |
234 | if (retval) { | 256 | if (retval) { |
235 | abort_creds(new); | 257 | abort_creds(new); |
236 | goto fail; | 258 | goto out; |
237 | } | 259 | } |
238 | } | 260 | } |
239 | 261 | ||
@@ -242,12 +264,13 @@ static int ____call_usermodehelper(void *data) | |||
242 | retval = do_execve(getname_kernel(sub_info->path), | 264 | retval = do_execve(getname_kernel(sub_info->path), |
243 | (const char __user *const __user *)sub_info->argv, | 265 | (const char __user *const __user *)sub_info->argv, |
244 | (const char __user *const __user *)sub_info->envp); | 266 | (const char __user *const __user *)sub_info->envp); |
267 | out: | ||
268 | sub_info->retval = retval; | ||
269 | /* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */ | ||
270 | if (wait != UMH_WAIT_PROC) | ||
271 | umh_complete(sub_info); | ||
245 | if (!retval) | 272 | if (!retval) |
246 | return 0; | 273 | return 0; |
247 | |||
248 | /* Exec failed? */ | ||
249 | fail: | ||
250 | sub_info->retval = retval; | ||
251 | do_exit(0); | 274 | do_exit(0); |
252 | } | 275 | } |
253 | 276 | ||
@@ -258,26 +281,6 @@ static int call_helper(void *data) | |||
258 | return ____call_usermodehelper(data); | 281 | return ____call_usermodehelper(data); |
259 | } | 282 | } |
260 | 283 | ||
261 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | ||
262 | { | ||
263 | if (info->cleanup) | ||
264 | (*info->cleanup)(info); | ||
265 | kfree(info); | ||
266 | } | ||
267 | |||
268 | static void umh_complete(struct subprocess_info *sub_info) | ||
269 | { | ||
270 | struct completion *comp = xchg(&sub_info->complete, NULL); | ||
271 | /* | ||
272 | * See call_usermodehelper_exec(). If xchg() returns NULL | ||
273 | * we own sub_info, the UMH_KILLABLE caller has gone away. | ||
274 | */ | ||
275 | if (comp) | ||
276 | complete(comp); | ||
277 | else | ||
278 | call_usermodehelper_freeinfo(sub_info); | ||
279 | } | ||
280 | |||
281 | /* Keventd can't block, but this (a child) can. */ | 284 | /* Keventd can't block, but this (a child) can. */ |
282 | static int wait_for_helper(void *data) | 285 | static int wait_for_helper(void *data) |
283 | { | 286 | { |
@@ -336,18 +339,8 @@ static void __call_usermodehelper(struct work_struct *work) | |||
336 | kmod_thread_locker = NULL; | 339 | kmod_thread_locker = NULL; |
337 | } | 340 | } |
338 | 341 | ||
339 | switch (wait) { | 342 | if (pid < 0) { |
340 | case UMH_NO_WAIT: | 343 | sub_info->retval = pid; |
341 | call_usermodehelper_freeinfo(sub_info); | ||
342 | break; | ||
343 | |||
344 | case UMH_WAIT_PROC: | ||
345 | if (pid > 0) | ||
346 | break; | ||
347 | /* FALLTHROUGH */ | ||
348 | case UMH_WAIT_EXEC: | ||
349 | if (pid < 0) | ||
350 | sub_info->retval = pid; | ||
351 | umh_complete(sub_info); | 344 | umh_complete(sub_info); |
352 | } | 345 | } |
353 | } | 346 | } |
@@ -588,7 +581,12 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | |||
588 | goto out; | 581 | goto out; |
589 | } | 582 | } |
590 | 583 | ||
591 | sub_info->complete = &done; | 584 | /* |
585 | * Set the completion pointer only if there is a waiter. | ||
586 | * This makes it possible to use umh_complete to free | ||
587 | * the data structure in case of UMH_NO_WAIT. | ||
588 | */ | ||
589 | sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done; | ||
592 | sub_info->wait = wait; | 590 | sub_info->wait = wait; |
593 | 591 | ||
594 | queue_work(khelper_wq, &sub_info->work); | 592 | queue_work(khelper_wq, &sub_info->work); |
diff --git a/kernel/panic.c b/kernel/panic.c index d09dc5c32c67..cf80672b7924 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -244,6 +244,7 @@ static const struct tnt tnts[] = { | |||
244 | * 'I' - Working around severe firmware bug. | 244 | * 'I' - Working around severe firmware bug. |
245 | * 'O' - Out-of-tree module has been loaded. | 245 | * 'O' - Out-of-tree module has been loaded. |
246 | * 'E' - Unsigned module has been loaded. | 246 | * 'E' - Unsigned module has been loaded. |
247 | * 'L' - A soft lockup has previously occurred. | ||
247 | * | 248 | * |
248 | * The string is overwritten by the next call to print_tainted(). | 249 | * The string is overwritten by the next call to print_tainted(). |
249 | */ | 250 | */ |
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index a9dfa79b6bab..1f35a3478f3c 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c | |||
@@ -502,8 +502,14 @@ int hibernation_restore(int platform_mode) | |||
502 | error = dpm_suspend_start(PMSG_QUIESCE); | 502 | error = dpm_suspend_start(PMSG_QUIESCE); |
503 | if (!error) { | 503 | if (!error) { |
504 | error = resume_target_kernel(platform_mode); | 504 | error = resume_target_kernel(platform_mode); |
505 | dpm_resume_end(PMSG_RECOVER); | 505 | /* |
506 | * The above should either succeed and jump to the new kernel, | ||
507 | * or return with an error. Otherwise things are just | ||
508 | * undefined, so let's be paranoid. | ||
509 | */ | ||
510 | BUG_ON(!error); | ||
506 | } | 511 | } |
512 | dpm_resume_end(PMSG_RECOVER); | ||
507 | pm_restore_gfp_mask(); | 513 | pm_restore_gfp_mask(); |
508 | resume_console(); | 514 | resume_console(); |
509 | pm_restore_console(); | 515 | pm_restore_console(); |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 4ca9a33ff620..c347e3ce3a55 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
@@ -146,7 +146,7 @@ static int platform_suspend_prepare(suspend_state_t state) | |||
146 | 146 | ||
147 | static int platform_suspend_prepare_late(suspend_state_t state) | 147 | static int platform_suspend_prepare_late(suspend_state_t state) |
148 | { | 148 | { |
149 | return state == PM_SUSPEND_FREEZE && freeze_ops->prepare ? | 149 | return state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->prepare ? |
150 | freeze_ops->prepare() : 0; | 150 | freeze_ops->prepare() : 0; |
151 | } | 151 | } |
152 | 152 | ||
@@ -164,7 +164,7 @@ static void platform_resume_noirq(suspend_state_t state) | |||
164 | 164 | ||
165 | static void platform_resume_early(suspend_state_t state) | 165 | static void platform_resume_early(suspend_state_t state) |
166 | { | 166 | { |
167 | if (state == PM_SUSPEND_FREEZE && freeze_ops->restore) | 167 | if (state == PM_SUSPEND_FREEZE && freeze_ops && freeze_ops->restore) |
168 | freeze_ops->restore(); | 168 | freeze_ops->restore(); |
169 | } | 169 | } |
170 | 170 | ||
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 133e47223095..9815447d22e0 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -3299,11 +3299,16 @@ static void _rcu_barrier(struct rcu_state *rsp) | |||
3299 | continue; | 3299 | continue; |
3300 | rdp = per_cpu_ptr(rsp->rda, cpu); | 3300 | rdp = per_cpu_ptr(rsp->rda, cpu); |
3301 | if (rcu_is_nocb_cpu(cpu)) { | 3301 | if (rcu_is_nocb_cpu(cpu)) { |
3302 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, | 3302 | if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { |
3303 | rsp->n_barrier_done); | 3303 | _rcu_barrier_trace(rsp, "OfflineNoCB", cpu, |
3304 | atomic_inc(&rsp->barrier_cpu_count); | 3304 | rsp->n_barrier_done); |
3305 | __call_rcu(&rdp->barrier_head, rcu_barrier_callback, | 3305 | } else { |
3306 | rsp, cpu, 0); | 3306 | _rcu_barrier_trace(rsp, "OnlineNoCB", cpu, |
3307 | rsp->n_barrier_done); | ||
3308 | atomic_inc(&rsp->barrier_cpu_count); | ||
3309 | __call_rcu(&rdp->barrier_head, | ||
3310 | rcu_barrier_callback, rsp, cpu, 0); | ||
3311 | } | ||
3307 | } else if (ACCESS_ONCE(rdp->qlen)) { | 3312 | } else if (ACCESS_ONCE(rdp->qlen)) { |
3308 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, | 3313 | _rcu_barrier_trace(rsp, "OnlineQ", cpu, |
3309 | rsp->n_barrier_done); | 3314 | rsp->n_barrier_done); |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index d03764652d91..bbdc45d8d74f 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
@@ -587,6 +587,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); | |||
587 | static void print_cpu_stall_info_end(void); | 587 | static void print_cpu_stall_info_end(void); |
588 | static void zero_cpu_stall_ticks(struct rcu_data *rdp); | 588 | static void zero_cpu_stall_ticks(struct rcu_data *rdp); |
589 | static void increment_cpu_stall_ticks(void); | 589 | static void increment_cpu_stall_ticks(void); |
590 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); | ||
590 | static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); | 591 | static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); |
591 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); | 592 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); |
592 | static void rcu_init_one_nocb(struct rcu_node *rnp); | 593 | static void rcu_init_one_nocb(struct rcu_node *rnp); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 387dd4599344..c1d7f27bd38f 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -2050,6 +2050,33 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force) | |||
2050 | } | 2050 | } |
2051 | 2051 | ||
2052 | /* | 2052 | /* |
2053 | * Does the specified CPU need an RCU callback for the specified flavor | ||
2054 | * of rcu_barrier()? | ||
2055 | */ | ||
2056 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | ||
2057 | { | ||
2058 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | ||
2059 | struct rcu_head *rhp; | ||
2060 | |||
2061 | /* No-CBs CPUs might have callbacks on any of three lists. */ | ||
2062 | rhp = ACCESS_ONCE(rdp->nocb_head); | ||
2063 | if (!rhp) | ||
2064 | rhp = ACCESS_ONCE(rdp->nocb_gp_head); | ||
2065 | if (!rhp) | ||
2066 | rhp = ACCESS_ONCE(rdp->nocb_follower_head); | ||
2067 | |||
2068 | /* Having no rcuo kthread but CBs after scheduler starts is bad! */ | ||
2069 | if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { | ||
2070 | /* RCU callback enqueued before CPU first came online??? */ | ||
2071 | pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", | ||
2072 | cpu, rhp->func); | ||
2073 | WARN_ON_ONCE(1); | ||
2074 | } | ||
2075 | |||
2076 | return !!rhp; | ||
2077 | } | ||
2078 | |||
2079 | /* | ||
2053 | * Enqueue the specified string of rcu_head structures onto the specified | 2080 | * Enqueue the specified string of rcu_head structures onto the specified |
2054 | * CPU's no-CBs lists. The CPU is specified by rdp, the head of the | 2081 | * CPU's no-CBs lists. The CPU is specified by rdp, the head of the |
2055 | * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy | 2082 | * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy |
@@ -2642,6 +2669,12 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) | |||
2642 | 2669 | ||
2643 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ | 2670 | #else /* #ifdef CONFIG_RCU_NOCB_CPU */ |
2644 | 2671 | ||
2672 | static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | ||
2673 | { | ||
2674 | WARN_ON_ONCE(1); /* Should be dead code. */ | ||
2675 | return false; | ||
2676 | } | ||
2677 | |||
2645 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) | 2678 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) |
2646 | { | 2679 | { |
2647 | } | 2680 | } |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44999505e1bf..24beb9bb4c3e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2475,44 +2475,6 @@ EXPORT_PER_CPU_SYMBOL(kstat); | |||
2475 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); | 2475 | EXPORT_PER_CPU_SYMBOL(kernel_cpustat); |
2476 | 2476 | ||
2477 | /* | 2477 | /* |
2478 | * Return any ns on the sched_clock that have not yet been accounted in | ||
2479 | * @p in case that task is currently running. | ||
2480 | * | ||
2481 | * Called with task_rq_lock() held on @rq. | ||
2482 | */ | ||
2483 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
2484 | { | ||
2485 | u64 ns = 0; | ||
2486 | |||
2487 | /* | ||
2488 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2489 | * project cycles that may never be accounted to this | ||
2490 | * thread, breaking clock_gettime(). | ||
2491 | */ | ||
2492 | if (task_current(rq, p) && task_on_rq_queued(p)) { | ||
2493 | update_rq_clock(rq); | ||
2494 | ns = rq_clock_task(rq) - p->se.exec_start; | ||
2495 | if ((s64)ns < 0) | ||
2496 | ns = 0; | ||
2497 | } | ||
2498 | |||
2499 | return ns; | ||
2500 | } | ||
2501 | |||
2502 | unsigned long long task_delta_exec(struct task_struct *p) | ||
2503 | { | ||
2504 | unsigned long flags; | ||
2505 | struct rq *rq; | ||
2506 | u64 ns = 0; | ||
2507 | |||
2508 | rq = task_rq_lock(p, &flags); | ||
2509 | ns = do_task_delta_exec(p, rq); | ||
2510 | task_rq_unlock(rq, p, &flags); | ||
2511 | |||
2512 | return ns; | ||
2513 | } | ||
2514 | |||
2515 | /* | ||
2516 | * Return accounted runtime for the task. | 2478 | * Return accounted runtime for the task. |
2517 | * In case the task is currently running, return the runtime plus current's | 2479 | * In case the task is currently running, return the runtime plus current's |
2518 | * pending runtime that have not been accounted yet. | 2480 | * pending runtime that have not been accounted yet. |
@@ -2521,7 +2483,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2521 | { | 2483 | { |
2522 | unsigned long flags; | 2484 | unsigned long flags; |
2523 | struct rq *rq; | 2485 | struct rq *rq; |
2524 | u64 ns = 0; | 2486 | u64 ns; |
2525 | 2487 | ||
2526 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) | 2488 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) |
2527 | /* | 2489 | /* |
@@ -2540,7 +2502,16 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2540 | #endif | 2502 | #endif |
2541 | 2503 | ||
2542 | rq = task_rq_lock(p, &flags); | 2504 | rq = task_rq_lock(p, &flags); |
2543 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 2505 | /* |
2506 | * Must be ->curr _and_ ->on_rq. If dequeued, we would | ||
2507 | * project cycles that may never be accounted to this | ||
2508 | * thread, breaking clock_gettime(). | ||
2509 | */ | ||
2510 | if (task_current(rq, p) && task_on_rq_queued(p)) { | ||
2511 | update_rq_clock(rq); | ||
2512 | p->sched_class->update_curr(rq); | ||
2513 | } | ||
2514 | ns = p->se.sum_exec_runtime; | ||
2544 | task_rq_unlock(rq, p, &flags); | 2515 | task_rq_unlock(rq, p, &flags); |
2545 | 2516 | ||
2546 | return ns; | 2517 | return ns; |
@@ -2951,6 +2922,47 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) | |||
2951 | } | 2922 | } |
2952 | NOKPROBE_SYMBOL(preempt_schedule); | 2923 | NOKPROBE_SYMBOL(preempt_schedule); |
2953 | EXPORT_SYMBOL(preempt_schedule); | 2924 | EXPORT_SYMBOL(preempt_schedule); |
2925 | |||
2926 | #ifdef CONFIG_CONTEXT_TRACKING | ||
2927 | /** | ||
2928 | * preempt_schedule_context - preempt_schedule called by tracing | ||
2929 | * | ||
2930 | * The tracing infrastructure uses preempt_enable_notrace to prevent | ||
2931 | * recursion and tracing preempt enabling caused by the tracing | ||
2932 | * infrastructure itself. But as tracing can happen in areas coming | ||
2933 | * from userspace or just about to enter userspace, a preempt enable | ||
2934 | * can occur before user_exit() is called. This will cause the scheduler | ||
2935 | * to be called when the system is still in usermode. | ||
2936 | * | ||
2937 | * To prevent this, the preempt_enable_notrace will use this function | ||
2938 | * instead of preempt_schedule() to exit user context if needed before | ||
2939 | * calling the scheduler. | ||
2940 | */ | ||
2941 | asmlinkage __visible void __sched notrace preempt_schedule_context(void) | ||
2942 | { | ||
2943 | enum ctx_state prev_ctx; | ||
2944 | |||
2945 | if (likely(!preemptible())) | ||
2946 | return; | ||
2947 | |||
2948 | do { | ||
2949 | __preempt_count_add(PREEMPT_ACTIVE); | ||
2950 | /* | ||
2951 | * Needs preempt disabled in case user_exit() is traced | ||
2952 | * and the tracer calls preempt_enable_notrace() causing | ||
2953 | * an infinite recursion. | ||
2954 | */ | ||
2955 | prev_ctx = exception_enter(); | ||
2956 | __schedule(); | ||
2957 | exception_exit(prev_ctx); | ||
2958 | |||
2959 | __preempt_count_sub(PREEMPT_ACTIVE); | ||
2960 | barrier(); | ||
2961 | } while (need_resched()); | ||
2962 | } | ||
2963 | EXPORT_SYMBOL_GPL(preempt_schedule_context); | ||
2964 | #endif /* CONFIG_CONTEXT_TRACKING */ | ||
2965 | |||
2954 | #endif /* CONFIG_PREEMPT */ | 2966 | #endif /* CONFIG_PREEMPT */ |
2955 | 2967 | ||
2956 | /* | 2968 | /* |
@@ -6327,6 +6339,10 @@ static void sched_init_numa(void) | |||
6327 | if (!sched_debug()) | 6339 | if (!sched_debug()) |
6328 | break; | 6340 | break; |
6329 | } | 6341 | } |
6342 | |||
6343 | if (!level) | ||
6344 | return; | ||
6345 | |||
6330 | /* | 6346 | /* |
6331 | * 'level' contains the number of unique distances, excluding the | 6347 | * 'level' contains the number of unique distances, excluding the |
6332 | * identity distance node_distance(i,i). | 6348 | * identity distance node_distance(i,i). |
@@ -7403,8 +7419,12 @@ void sched_move_task(struct task_struct *tsk) | |||
7403 | if (unlikely(running)) | 7419 | if (unlikely(running)) |
7404 | put_prev_task(rq, tsk); | 7420 | put_prev_task(rq, tsk); |
7405 | 7421 | ||
7406 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, | 7422 | /* |
7407 | lockdep_is_held(&tsk->sighand->siglock)), | 7423 | * All callers are synchronized by task_rq_lock(); we do not use RCU |
7424 | * which is pointless here. Thus, we pass "true" to task_css_check() | ||
7425 | * to prevent lockdep warnings. | ||
7426 | */ | ||
7427 | tg = container_of(task_css_check(tsk, cpu_cgrp_id, true), | ||
7408 | struct task_group, css); | 7428 | struct task_group, css); |
7409 | tg = autogroup_task_group(tsk, tg); | 7429 | tg = autogroup_task_group(tsk, tg); |
7410 | tsk->sched_task_group = tg; | 7430 | tsk->sched_task_group = tg; |
@@ -7833,6 +7853,11 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
7833 | sched_offline_group(tg); | 7853 | sched_offline_group(tg); |
7834 | } | 7854 | } |
7835 | 7855 | ||
7856 | static void cpu_cgroup_fork(struct task_struct *task) | ||
7857 | { | ||
7858 | sched_move_task(task); | ||
7859 | } | ||
7860 | |||
7836 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, | 7861 | static int cpu_cgroup_can_attach(struct cgroup_subsys_state *css, |
7837 | struct cgroup_taskset *tset) | 7862 | struct cgroup_taskset *tset) |
7838 | { | 7863 | { |
@@ -8205,6 +8230,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { | |||
8205 | .css_free = cpu_cgroup_css_free, | 8230 | .css_free = cpu_cgroup_css_free, |
8206 | .css_online = cpu_cgroup_css_online, | 8231 | .css_online = cpu_cgroup_css_online, |
8207 | .css_offline = cpu_cgroup_css_offline, | 8232 | .css_offline = cpu_cgroup_css_offline, |
8233 | .fork = cpu_cgroup_fork, | ||
8208 | .can_attach = cpu_cgroup_can_attach, | 8234 | .can_attach = cpu_cgroup_can_attach, |
8209 | .attach = cpu_cgroup_attach, | 8235 | .attach = cpu_cgroup_attach, |
8210 | .exit = cpu_cgroup_exit, | 8236 | .exit = cpu_cgroup_exit, |
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 256e577faf1b..28fa9d9e9201 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -518,12 +518,20 @@ again: | |||
518 | } | 518 | } |
519 | 519 | ||
520 | /* | 520 | /* |
521 | * We need to take care of a possible races here. In fact, the | 521 | * We need to take care of several possible races here: |
522 | * task might have changed its scheduling policy to something | 522 | * |
523 | * different from SCHED_DEADLINE or changed its reservation | 523 | * - the task might have changed its scheduling policy |
524 | * parameters (through sched_setattr()). | 524 | * to something different than SCHED_DEADLINE |
525 | * - the task might have changed its reservation parameters | ||
526 | * (through sched_setattr()) | ||
527 | * - the task might have been boosted by someone else and | ||
528 | * might be in the boosting/deboosting path | ||
529 | * | ||
530 | * In all this cases we bail out, as the task is already | ||
531 | * in the runqueue or is going to be enqueued back anyway. | ||
525 | */ | 532 | */ |
526 | if (!dl_task(p) || dl_se->dl_new) | 533 | if (!dl_task(p) || dl_se->dl_new || |
534 | dl_se->dl_boosted || !dl_se->dl_throttled) | ||
527 | goto unlock; | 535 | goto unlock; |
528 | 536 | ||
529 | sched_clock_tick(); | 537 | sched_clock_tick(); |
@@ -532,7 +540,7 @@ again: | |||
532 | dl_se->dl_yielded = 0; | 540 | dl_se->dl_yielded = 0; |
533 | if (task_on_rq_queued(p)) { | 541 | if (task_on_rq_queued(p)) { |
534 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); | 542 | enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); |
535 | if (task_has_dl_policy(rq->curr)) | 543 | if (dl_task(rq->curr)) |
536 | check_preempt_curr_dl(rq, p, 0); | 544 | check_preempt_curr_dl(rq, p, 0); |
537 | else | 545 | else |
538 | resched_curr(rq); | 546 | resched_curr(rq); |
@@ -847,8 +855,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) | |||
847 | * smaller than our one... OTW we keep our runtime and | 855 | * smaller than our one... OTW we keep our runtime and |
848 | * deadline. | 856 | * deadline. |
849 | */ | 857 | */ |
850 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) | 858 | if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) { |
851 | pi_se = &pi_task->dl; | 859 | pi_se = &pi_task->dl; |
860 | } else if (!dl_prio(p->normal_prio)) { | ||
861 | /* | ||
862 | * Special case in which we have a !SCHED_DEADLINE task | ||
863 | * that is going to be deboosted, but exceedes its | ||
864 | * runtime while doing so. No point in replenishing | ||
865 | * it, as it's going to return back to its original | ||
866 | * scheduling class after this. | ||
867 | */ | ||
868 | BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH); | ||
869 | return; | ||
870 | } | ||
852 | 871 | ||
853 | /* | 872 | /* |
854 | * If p is throttled, we do nothing. In fact, if it exhausted | 873 | * If p is throttled, we do nothing. In fact, if it exhausted |
@@ -1607,8 +1626,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) | |||
1607 | /* Only reschedule if pushing failed */ | 1626 | /* Only reschedule if pushing failed */ |
1608 | check_resched = 0; | 1627 | check_resched = 0; |
1609 | #endif /* CONFIG_SMP */ | 1628 | #endif /* CONFIG_SMP */ |
1610 | if (check_resched && task_has_dl_policy(rq->curr)) | 1629 | if (check_resched) { |
1611 | check_preempt_curr_dl(rq, p, 0); | 1630 | if (dl_task(rq->curr)) |
1631 | check_preempt_curr_dl(rq, p, 0); | ||
1632 | else | ||
1633 | resched_curr(rq); | ||
1634 | } | ||
1612 | } | 1635 | } |
1613 | } | 1636 | } |
1614 | 1637 | ||
@@ -1678,4 +1701,6 @@ const struct sched_class dl_sched_class = { | |||
1678 | .prio_changed = prio_changed_dl, | 1701 | .prio_changed = prio_changed_dl, |
1679 | .switched_from = switched_from_dl, | 1702 | .switched_from = switched_from_dl, |
1680 | .switched_to = switched_to_dl, | 1703 | .switched_to = switched_to_dl, |
1704 | |||
1705 | .update_curr = update_curr_dl, | ||
1681 | }; | 1706 | }; |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0b069bf3e708..ef2b104b254c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -726,6 +726,11 @@ static void update_curr(struct cfs_rq *cfs_rq) | |||
726 | account_cfs_rq_runtime(cfs_rq, delta_exec); | 726 | account_cfs_rq_runtime(cfs_rq, delta_exec); |
727 | } | 727 | } |
728 | 728 | ||
729 | static void update_curr_fair(struct rq *rq) | ||
730 | { | ||
731 | update_curr(cfs_rq_of(&rq->curr->se)); | ||
732 | } | ||
733 | |||
729 | static inline void | 734 | static inline void |
730 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | 735 | update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) |
731 | { | 736 | { |
@@ -828,11 +833,12 @@ static unsigned int task_nr_scan_windows(struct task_struct *p) | |||
828 | 833 | ||
829 | static unsigned int task_scan_min(struct task_struct *p) | 834 | static unsigned int task_scan_min(struct task_struct *p) |
830 | { | 835 | { |
836 | unsigned int scan_size = ACCESS_ONCE(sysctl_numa_balancing_scan_size); | ||
831 | unsigned int scan, floor; | 837 | unsigned int scan, floor; |
832 | unsigned int windows = 1; | 838 | unsigned int windows = 1; |
833 | 839 | ||
834 | if (sysctl_numa_balancing_scan_size < MAX_SCAN_WINDOW) | 840 | if (scan_size < MAX_SCAN_WINDOW) |
835 | windows = MAX_SCAN_WINDOW / sysctl_numa_balancing_scan_size; | 841 | windows = MAX_SCAN_WINDOW / scan_size; |
836 | floor = 1000 / windows; | 842 | floor = 1000 / windows; |
837 | 843 | ||
838 | scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p); | 844 | scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p); |
@@ -1164,9 +1170,26 @@ static void task_numa_compare(struct task_numa_env *env, | |||
1164 | long moveimp = imp; | 1170 | long moveimp = imp; |
1165 | 1171 | ||
1166 | rcu_read_lock(); | 1172 | rcu_read_lock(); |
1167 | cur = ACCESS_ONCE(dst_rq->curr); | 1173 | |
1168 | if (cur->pid == 0) /* idle */ | 1174 | raw_spin_lock_irq(&dst_rq->lock); |
1175 | cur = dst_rq->curr; | ||
1176 | /* | ||
1177 | * No need to move the exiting task, and this ensures that ->curr | ||
1178 | * wasn't reaped and thus get_task_struct() in task_numa_assign() | ||
1179 | * is safe under RCU read lock. | ||
1180 | * Note that rcu_read_lock() itself can't protect from the final | ||
1181 | * put_task_struct() after the last schedule(). | ||
1182 | */ | ||
1183 | if ((cur->flags & PF_EXITING) || is_idle_task(cur)) | ||
1169 | cur = NULL; | 1184 | cur = NULL; |
1185 | raw_spin_unlock_irq(&dst_rq->lock); | ||
1186 | |||
1187 | /* | ||
1188 | * Because we have preemption enabled we can get migrated around and | ||
1189 | * end try selecting ourselves (current == env->p) as a swap candidate. | ||
1190 | */ | ||
1191 | if (cur == env->p) | ||
1192 | goto unlock; | ||
1170 | 1193 | ||
1171 | /* | 1194 | /* |
1172 | * "imp" is the fault differential for the source task between the | 1195 | * "imp" is the fault differential for the source task between the |
@@ -1520,7 +1543,7 @@ static void update_task_scan_period(struct task_struct *p, | |||
1520 | * scanning faster if shared accesses dominate as it may | 1543 | * scanning faster if shared accesses dominate as it may |
1521 | * simply bounce migrations uselessly | 1544 | * simply bounce migrations uselessly |
1522 | */ | 1545 | */ |
1523 | ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared)); | 1546 | ratio = DIV_ROUND_UP(private * NUMA_PERIOD_SLOTS, (private + shared + 1)); |
1524 | diff = (diff * ratio) / NUMA_PERIOD_SLOTS; | 1547 | diff = (diff * ratio) / NUMA_PERIOD_SLOTS; |
1525 | } | 1548 | } |
1526 | 1549 | ||
@@ -7938,6 +7961,8 @@ const struct sched_class fair_sched_class = { | |||
7938 | 7961 | ||
7939 | .get_rr_interval = get_rr_interval_fair, | 7962 | .get_rr_interval = get_rr_interval_fair, |
7940 | 7963 | ||
7964 | .update_curr = update_curr_fair, | ||
7965 | |||
7941 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7966 | #ifdef CONFIG_FAIR_GROUP_SCHED |
7942 | .task_move_group = task_move_group_fair, | 7967 | .task_move_group = task_move_group_fair, |
7943 | #endif | 7968 | #endif |
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 67ad4e7f506a..c65dac8c97cd 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c | |||
@@ -75,6 +75,10 @@ static unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task | |||
75 | return 0; | 75 | return 0; |
76 | } | 76 | } |
77 | 77 | ||
78 | static void update_curr_idle(struct rq *rq) | ||
79 | { | ||
80 | } | ||
81 | |||
78 | /* | 82 | /* |
79 | * Simple, special scheduling class for the per-CPU idle tasks: | 83 | * Simple, special scheduling class for the per-CPU idle tasks: |
80 | */ | 84 | */ |
@@ -101,4 +105,5 @@ const struct sched_class idle_sched_class = { | |||
101 | 105 | ||
102 | .prio_changed = prio_changed_idle, | 106 | .prio_changed = prio_changed_idle, |
103 | .switched_to = switched_to_idle, | 107 | .switched_to = switched_to_idle, |
108 | .update_curr = update_curr_idle, | ||
104 | }; | 109 | }; |
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d024e6ce30ba..20bca398084a 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c | |||
@@ -2128,6 +2128,8 @@ const struct sched_class rt_sched_class = { | |||
2128 | 2128 | ||
2129 | .prio_changed = prio_changed_rt, | 2129 | .prio_changed = prio_changed_rt, |
2130 | .switched_to = switched_to_rt, | 2130 | .switched_to = switched_to_rt, |
2131 | |||
2132 | .update_curr = update_curr_rt, | ||
2131 | }; | 2133 | }; |
2132 | 2134 | ||
2133 | #ifdef CONFIG_SCHED_DEBUG | 2135 | #ifdef CONFIG_SCHED_DEBUG |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 24156c8434d1..2df8ef067cc5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -1135,6 +1135,8 @@ struct sched_class { | |||
1135 | unsigned int (*get_rr_interval) (struct rq *rq, | 1135 | unsigned int (*get_rr_interval) (struct rq *rq, |
1136 | struct task_struct *task); | 1136 | struct task_struct *task); |
1137 | 1137 | ||
1138 | void (*update_curr) (struct rq *rq); | ||
1139 | |||
1138 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1140 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1139 | void (*task_move_group) (struct task_struct *p, int on_rq); | 1141 | void (*task_move_group) (struct task_struct *p, int on_rq); |
1140 | #endif | 1142 | #endif |
diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 67426e529f59..79ffec45a6ac 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c | |||
@@ -102,6 +102,10 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task) | |||
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
104 | 104 | ||
105 | static void update_curr_stop(struct rq *rq) | ||
106 | { | ||
107 | } | ||
108 | |||
105 | /* | 109 | /* |
106 | * Simple, special scheduling class for the per-CPU stop tasks: | 110 | * Simple, special scheduling class for the per-CPU stop tasks: |
107 | */ | 111 | */ |
@@ -128,4 +132,5 @@ const struct sched_class stop_sched_class = { | |||
128 | 132 | ||
129 | .prio_changed = prio_changed_stop, | 133 | .prio_changed = prio_changed_stop, |
130 | .switched_to = switched_to_stop, | 134 | .switched_to = switched_to_stop, |
135 | .update_curr = update_curr_stop, | ||
131 | }; | 136 | }; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4aada6d9fe74..15f2511a1b7c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -387,7 +387,8 @@ static struct ctl_table kern_table[] = { | |||
387 | .data = &sysctl_numa_balancing_scan_size, | 387 | .data = &sysctl_numa_balancing_scan_size, |
388 | .maxlen = sizeof(unsigned int), | 388 | .maxlen = sizeof(unsigned int), |
389 | .mode = 0644, | 389 | .mode = 0644, |
390 | .proc_handler = proc_dointvec, | 390 | .proc_handler = proc_dointvec_minmax, |
391 | .extra1 = &one, | ||
391 | }, | 392 | }, |
392 | { | 393 | { |
393 | .procname = "numa_balancing", | 394 | .procname = "numa_balancing", |
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 9c94c19f1305..55449909f114 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c | |||
@@ -72,7 +72,7 @@ static u64 cev_delta2ns(unsigned long latch, struct clock_event_device *evt, | |||
72 | * Also omit the add if it would overflow the u64 boundary. | 72 | * Also omit the add if it would overflow the u64 boundary. |
73 | */ | 73 | */ |
74 | if ((~0ULL - clc > rnd) && | 74 | if ((~0ULL - clc > rnd) && |
75 | (!ismax || evt->mult <= (1U << evt->shift))) | 75 | (!ismax || evt->mult <= (1ULL << evt->shift))) |
76 | clc += rnd; | 76 | clc += rnd; |
77 | 77 | ||
78 | do_div(clc, evt->mult); | 78 | do_div(clc, evt->mult); |
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 492b986195d5..a16b67859e2a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c | |||
@@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, | |||
553 | *sample = cputime_to_expires(cputime.utime); | 553 | *sample = cputime_to_expires(cputime.utime); |
554 | break; | 554 | break; |
555 | case CPUCLOCK_SCHED: | 555 | case CPUCLOCK_SCHED: |
556 | *sample = cputime.sum_exec_runtime + task_delta_exec(p); | 556 | *sample = cputime.sum_exec_runtime; |
557 | break; | 557 | break; |
558 | } | 558 | } |
559 | return 0; | 559 | return 0; |
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 42b463ad90f2..31ea01f42e1f 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c | |||
@@ -636,6 +636,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, | |||
636 | goto out; | 636 | goto out; |
637 | } | 637 | } |
638 | } else { | 638 | } else { |
639 | memset(&event.sigev_value, 0, sizeof(event.sigev_value)); | ||
639 | event.sigev_notify = SIGEV_SIGNAL; | 640 | event.sigev_notify = SIGEV_SIGNAL; |
640 | event.sigev_signo = SIGALRM; | 641 | event.sigev_signo = SIGALRM; |
641 | event.sigev_value.sival_int = new_timer->it_id; | 642 | event.sigev_value.sival_int = new_timer->it_id; |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fb186b9ddf51..31c90fec4158 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1925,8 +1925,16 @@ ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) | |||
1925 | * when we are adding another op to the rec or removing the | 1925 | * when we are adding another op to the rec or removing the |
1926 | * current one. Thus, if the op is being added, we can | 1926 | * current one. Thus, if the op is being added, we can |
1927 | * ignore it because it hasn't attached itself to the rec | 1927 | * ignore it because it hasn't attached itself to the rec |
1928 | * yet. That means we just need to find the op that has a | 1928 | * yet. |
1929 | * trampoline and is not beeing added. | 1929 | * |
1930 | * If an ops is being modified (hooking to different functions) | ||
1931 | * then we don't care about the new functions that are being | ||
1932 | * added, just the old ones (that are probably being removed). | ||
1933 | * | ||
1934 | * If we are adding an ops to a function that already is using | ||
1935 | * a trampoline, it needs to be removed (trampolines are only | ||
1936 | * for single ops connected), then an ops that is not being | ||
1937 | * modified also needs to be checked. | ||
1930 | */ | 1938 | */ |
1931 | do_for_each_ftrace_op(op, ftrace_ops_list) { | 1939 | do_for_each_ftrace_op(op, ftrace_ops_list) { |
1932 | 1940 | ||
@@ -1940,17 +1948,23 @@ ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec) | |||
1940 | if (op->flags & FTRACE_OPS_FL_ADDING) | 1948 | if (op->flags & FTRACE_OPS_FL_ADDING) |
1941 | continue; | 1949 | continue; |
1942 | 1950 | ||
1951 | |||
1943 | /* | 1952 | /* |
1944 | * If the ops is not being added and has a trampoline, | 1953 | * If the ops is being modified and is in the old |
1945 | * then it must be the one that we want! | 1954 | * hash, then it is probably being removed from this |
1955 | * function. | ||
1946 | */ | 1956 | */ |
1947 | if (hash_contains_ip(ip, op->func_hash)) | ||
1948 | return op; | ||
1949 | |||
1950 | /* If the ops is being modified, it may be in the old hash. */ | ||
1951 | if ((op->flags & FTRACE_OPS_FL_MODIFYING) && | 1957 | if ((op->flags & FTRACE_OPS_FL_MODIFYING) && |
1952 | hash_contains_ip(ip, &op->old_hash)) | 1958 | hash_contains_ip(ip, &op->old_hash)) |
1953 | return op; | 1959 | return op; |
1960 | /* | ||
1961 | * If the ops is not being added or modified, and it's | ||
1962 | * in its normal filter hash, then this must be the one | ||
1963 | * we want! | ||
1964 | */ | ||
1965 | if (!(op->flags & FTRACE_OPS_FL_MODIFYING) && | ||
1966 | hash_contains_ip(ip, op->func_hash)) | ||
1967 | return op; | ||
1954 | 1968 | ||
1955 | } while_for_each_ftrace_op(op); | 1969 | } while_for_each_ftrace_op(op); |
1956 | 1970 | ||
@@ -2293,10 +2307,13 @@ static void ftrace_run_update_code(int command) | |||
2293 | FTRACE_WARN_ON(ret); | 2307 | FTRACE_WARN_ON(ret); |
2294 | } | 2308 | } |
2295 | 2309 | ||
2296 | static void ftrace_run_modify_code(struct ftrace_ops *ops, int command) | 2310 | static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, |
2311 | struct ftrace_hash *old_hash) | ||
2297 | { | 2312 | { |
2298 | ops->flags |= FTRACE_OPS_FL_MODIFYING; | 2313 | ops->flags |= FTRACE_OPS_FL_MODIFYING; |
2314 | ops->old_hash.filter_hash = old_hash; | ||
2299 | ftrace_run_update_code(command); | 2315 | ftrace_run_update_code(command); |
2316 | ops->old_hash.filter_hash = NULL; | ||
2300 | ops->flags &= ~FTRACE_OPS_FL_MODIFYING; | 2317 | ops->flags &= ~FTRACE_OPS_FL_MODIFYING; |
2301 | } | 2318 | } |
2302 | 2319 | ||
@@ -3340,7 +3357,7 @@ static struct ftrace_ops trace_probe_ops __read_mostly = | |||
3340 | 3357 | ||
3341 | static int ftrace_probe_registered; | 3358 | static int ftrace_probe_registered; |
3342 | 3359 | ||
3343 | static void __enable_ftrace_function_probe(void) | 3360 | static void __enable_ftrace_function_probe(struct ftrace_hash *old_hash) |
3344 | { | 3361 | { |
3345 | int ret; | 3362 | int ret; |
3346 | int i; | 3363 | int i; |
@@ -3348,7 +3365,8 @@ static void __enable_ftrace_function_probe(void) | |||
3348 | if (ftrace_probe_registered) { | 3365 | if (ftrace_probe_registered) { |
3349 | /* still need to update the function call sites */ | 3366 | /* still need to update the function call sites */ |
3350 | if (ftrace_enabled) | 3367 | if (ftrace_enabled) |
3351 | ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS); | 3368 | ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, |
3369 | old_hash); | ||
3352 | return; | 3370 | return; |
3353 | } | 3371 | } |
3354 | 3372 | ||
@@ -3477,13 +3495,14 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, | |||
3477 | } while_for_each_ftrace_rec(); | 3495 | } while_for_each_ftrace_rec(); |
3478 | 3496 | ||
3479 | ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); | 3497 | ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); |
3498 | |||
3499 | __enable_ftrace_function_probe(old_hash); | ||
3500 | |||
3480 | if (!ret) | 3501 | if (!ret) |
3481 | free_ftrace_hash_rcu(old_hash); | 3502 | free_ftrace_hash_rcu(old_hash); |
3482 | else | 3503 | else |
3483 | count = ret; | 3504 | count = ret; |
3484 | 3505 | ||
3485 | __enable_ftrace_function_probe(); | ||
3486 | |||
3487 | out_unlock: | 3506 | out_unlock: |
3488 | mutex_unlock(&ftrace_lock); | 3507 | mutex_unlock(&ftrace_lock); |
3489 | out: | 3508 | out: |
@@ -3764,10 +3783,11 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) | |||
3764 | return add_hash_entry(hash, ip); | 3783 | return add_hash_entry(hash, ip); |
3765 | } | 3784 | } |
3766 | 3785 | ||
3767 | static void ftrace_ops_update_code(struct ftrace_ops *ops) | 3786 | static void ftrace_ops_update_code(struct ftrace_ops *ops, |
3787 | struct ftrace_hash *old_hash) | ||
3768 | { | 3788 | { |
3769 | if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) | 3789 | if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled) |
3770 | ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS); | 3790 | ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); |
3771 | } | 3791 | } |
3772 | 3792 | ||
3773 | static int | 3793 | static int |
@@ -3813,7 +3833,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, | |||
3813 | old_hash = *orig_hash; | 3833 | old_hash = *orig_hash; |
3814 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); | 3834 | ret = ftrace_hash_move(ops, enable, orig_hash, hash); |
3815 | if (!ret) { | 3835 | if (!ret) { |
3816 | ftrace_ops_update_code(ops); | 3836 | ftrace_ops_update_code(ops, old_hash); |
3817 | free_ftrace_hash_rcu(old_hash); | 3837 | free_ftrace_hash_rcu(old_hash); |
3818 | } | 3838 | } |
3819 | mutex_unlock(&ftrace_lock); | 3839 | mutex_unlock(&ftrace_lock); |
@@ -4058,7 +4078,7 @@ int ftrace_regex_release(struct inode *inode, struct file *file) | |||
4058 | ret = ftrace_hash_move(iter->ops, filter_hash, | 4078 | ret = ftrace_hash_move(iter->ops, filter_hash, |
4059 | orig_hash, iter->hash); | 4079 | orig_hash, iter->hash); |
4060 | if (!ret) { | 4080 | if (!ret) { |
4061 | ftrace_ops_update_code(iter->ops); | 4081 | ftrace_ops_update_code(iter->ops, old_hash); |
4062 | free_ftrace_hash_rcu(old_hash); | 4082 | free_ftrace_hash_rcu(old_hash); |
4063 | } | 4083 | } |
4064 | mutex_unlock(&ftrace_lock); | 4084 | mutex_unlock(&ftrace_lock); |
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 2d75c94ae87d..a56e07c8d15b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c | |||
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work) | |||
538 | * ring_buffer_wait - wait for input to the ring buffer | 538 | * ring_buffer_wait - wait for input to the ring buffer |
539 | * @buffer: buffer to wait on | 539 | * @buffer: buffer to wait on |
540 | * @cpu: the cpu buffer to wait on | 540 | * @cpu: the cpu buffer to wait on |
541 | * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS | ||
541 | * | 542 | * |
542 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon | 543 | * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon |
543 | * as data is added to any of the @buffer's cpu buffers. Otherwise | 544 | * as data is added to any of the @buffer's cpu buffers. Otherwise |
544 | * it will wait for data to be added to a specific cpu buffer. | 545 | * it will wait for data to be added to a specific cpu buffer. |
545 | */ | 546 | */ |
546 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | 547 | int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) |
547 | { | 548 | { |
548 | struct ring_buffer_per_cpu *cpu_buffer; | 549 | struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); |
549 | DEFINE_WAIT(wait); | 550 | DEFINE_WAIT(wait); |
550 | struct rb_irq_work *work; | 551 | struct rb_irq_work *work; |
552 | int ret = 0; | ||
551 | 553 | ||
552 | /* | 554 | /* |
553 | * Depending on what the caller is waiting for, either any | 555 | * Depending on what the caller is waiting for, either any |
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu) | |||
564 | } | 566 | } |
565 | 567 | ||
566 | 568 | ||
567 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | 569 | while (true) { |
570 | prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); | ||
568 | 571 | ||
569 | /* | 572 | /* |
570 | * The events can happen in critical sections where | 573 | * The events can happen in critical sections where |
571 | * checking a work queue can cause deadlocks. | 574 | * checking a work queue can cause deadlocks. |
572 | * After adding a task to the queue, this flag is set | 575 | * After adding a task to the queue, this flag is set |
573 | * only to notify events to try to wake up the queue | 576 | * only to notify events to try to wake up the queue |
574 | * using irq_work. | 577 | * using irq_work. |
575 | * | 578 | * |
576 | * We don't clear it even if the buffer is no longer | 579 | * We don't clear it even if the buffer is no longer |
577 | * empty. The flag only causes the next event to run | 580 | * empty. The flag only causes the next event to run |
578 | * irq_work to do the work queue wake up. The worse | 581 | * irq_work to do the work queue wake up. The worse |
579 | * that can happen if we race with !trace_empty() is that | 582 | * that can happen if we race with !trace_empty() is that |
580 | * an event will cause an irq_work to try to wake up | 583 | * an event will cause an irq_work to try to wake up |
581 | * an empty queue. | 584 | * an empty queue. |
582 | * | 585 | * |
583 | * There's no reason to protect this flag either, as | 586 | * There's no reason to protect this flag either, as |
584 | * the work queue and irq_work logic will do the necessary | 587 | * the work queue and irq_work logic will do the necessary |
585 | * synchronization for the wake ups. The only thing | 588 | * synchronization for the wake ups. The only thing |
586 | * that is necessary is that the wake up happens after | 589 | * that is necessary is that the wake up happens after |
587 | * a task has been queued. It's OK for spurious wake ups. | 590 | * a task has been queued. It's OK for spurious wake ups. |
588 | */ | 591 | */ |
589 | work->waiters_pending = true; | 592 | work->waiters_pending = true; |
593 | |||
594 | if (signal_pending(current)) { | ||
595 | ret = -EINTR; | ||
596 | break; | ||
597 | } | ||
598 | |||
599 | if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) | ||
600 | break; | ||
601 | |||
602 | if (cpu != RING_BUFFER_ALL_CPUS && | ||
603 | !ring_buffer_empty_cpu(buffer, cpu)) { | ||
604 | unsigned long flags; | ||
605 | bool pagebusy; | ||
606 | |||
607 | if (!full) | ||
608 | break; | ||
609 | |||
610 | raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); | ||
611 | pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; | ||
612 | raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); | ||
613 | |||
614 | if (!pagebusy) | ||
615 | break; | ||
616 | } | ||
590 | 617 | ||
591 | if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || | ||
592 | (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) | ||
593 | schedule(); | 618 | schedule(); |
619 | } | ||
594 | 620 | ||
595 | finish_wait(&work->waiters, &wait); | 621 | finish_wait(&work->waiters, &wait); |
596 | return 0; | 622 | |
623 | return ret; | ||
597 | } | 624 | } |
598 | 625 | ||
599 | /** | 626 | /** |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 459a7b1251e5..426962b04183 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) | |||
1076 | } | 1076 | } |
1077 | #endif /* CONFIG_TRACER_MAX_TRACE */ | 1077 | #endif /* CONFIG_TRACER_MAX_TRACE */ |
1078 | 1078 | ||
1079 | static int wait_on_pipe(struct trace_iterator *iter) | 1079 | static int wait_on_pipe(struct trace_iterator *iter, bool full) |
1080 | { | 1080 | { |
1081 | /* Iterators are static, they should be filled or empty */ | 1081 | /* Iterators are static, they should be filled or empty */ |
1082 | if (trace_buffer_iter(iter, iter->cpu_file)) | 1082 | if (trace_buffer_iter(iter, iter->cpu_file)) |
1083 | return 0; | 1083 | return 0; |
1084 | 1084 | ||
1085 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); | 1085 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, |
1086 | full); | ||
1086 | } | 1087 | } |
1087 | 1088 | ||
1088 | #ifdef CONFIG_FTRACE_STARTUP_TEST | 1089 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp) | |||
4434 | 4435 | ||
4435 | mutex_unlock(&iter->mutex); | 4436 | mutex_unlock(&iter->mutex); |
4436 | 4437 | ||
4437 | ret = wait_on_pipe(iter); | 4438 | ret = wait_on_pipe(iter, false); |
4438 | 4439 | ||
4439 | mutex_lock(&iter->mutex); | 4440 | mutex_lock(&iter->mutex); |
4440 | 4441 | ||
4441 | if (ret) | 4442 | if (ret) |
4442 | return ret; | 4443 | return ret; |
4443 | |||
4444 | if (signal_pending(current)) | ||
4445 | return -EINTR; | ||
4446 | } | 4444 | } |
4447 | 4445 | ||
4448 | return 1; | 4446 | return 1; |
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
5372 | goto out_unlock; | 5370 | goto out_unlock; |
5373 | } | 5371 | } |
5374 | mutex_unlock(&trace_types_lock); | 5372 | mutex_unlock(&trace_types_lock); |
5375 | ret = wait_on_pipe(iter); | 5373 | ret = wait_on_pipe(iter, false); |
5376 | mutex_lock(&trace_types_lock); | 5374 | mutex_lock(&trace_types_lock); |
5377 | if (ret) { | 5375 | if (ret) { |
5378 | size = ret; | 5376 | size = ret; |
5379 | goto out_unlock; | 5377 | goto out_unlock; |
5380 | } | 5378 | } |
5381 | if (signal_pending(current)) { | ||
5382 | size = -EINTR; | ||
5383 | goto out_unlock; | ||
5384 | } | ||
5385 | goto again; | 5379 | goto again; |
5386 | } | 5380 | } |
5387 | size = 0; | 5381 | size = 0; |
@@ -5500,7 +5494,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5500 | }; | 5494 | }; |
5501 | struct buffer_ref *ref; | 5495 | struct buffer_ref *ref; |
5502 | int entries, size, i; | 5496 | int entries, size, i; |
5503 | ssize_t ret; | 5497 | ssize_t ret = 0; |
5504 | 5498 | ||
5505 | mutex_lock(&trace_types_lock); | 5499 | mutex_lock(&trace_types_lock); |
5506 | 5500 | ||
@@ -5538,13 +5532,16 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5538 | int r; | 5532 | int r; |
5539 | 5533 | ||
5540 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); | 5534 | ref = kzalloc(sizeof(*ref), GFP_KERNEL); |
5541 | if (!ref) | 5535 | if (!ref) { |
5536 | ret = -ENOMEM; | ||
5542 | break; | 5537 | break; |
5538 | } | ||
5543 | 5539 | ||
5544 | ref->ref = 1; | 5540 | ref->ref = 1; |
5545 | ref->buffer = iter->trace_buffer->buffer; | 5541 | ref->buffer = iter->trace_buffer->buffer; |
5546 | ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); | 5542 | ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file); |
5547 | if (!ref->page) { | 5543 | if (!ref->page) { |
5544 | ret = -ENOMEM; | ||
5548 | kfree(ref); | 5545 | kfree(ref); |
5549 | break; | 5546 | break; |
5550 | } | 5547 | } |
@@ -5582,19 +5579,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
5582 | 5579 | ||
5583 | /* did we read anything? */ | 5580 | /* did we read anything? */ |
5584 | if (!spd.nr_pages) { | 5581 | if (!spd.nr_pages) { |
5582 | if (ret) | ||
5583 | goto out; | ||
5584 | |||
5585 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { | 5585 | if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK)) { |
5586 | ret = -EAGAIN; | 5586 | ret = -EAGAIN; |
5587 | goto out; | 5587 | goto out; |
5588 | } | 5588 | } |
5589 | mutex_unlock(&trace_types_lock); | 5589 | mutex_unlock(&trace_types_lock); |
5590 | ret = wait_on_pipe(iter); | 5590 | ret = wait_on_pipe(iter, true); |
5591 | mutex_lock(&trace_types_lock); | 5591 | mutex_lock(&trace_types_lock); |
5592 | if (ret) | 5592 | if (ret) |
5593 | goto out; | 5593 | goto out; |
5594 | if (signal_pending(current)) { | 5594 | |
5595 | ret = -EINTR; | ||
5596 | goto out; | ||
5597 | } | ||
5598 | goto again; | 5595 | goto again; |
5599 | } | 5596 | } |
5600 | 5597 | ||
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 4dc8b79c5f75..29228c4d5696 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -313,7 +313,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) | |||
313 | int size; | 313 | int size; |
314 | 314 | ||
315 | syscall_nr = trace_get_syscall_nr(current, regs); | 315 | syscall_nr = trace_get_syscall_nr(current, regs); |
316 | if (syscall_nr < 0) | 316 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
317 | return; | 317 | return; |
318 | 318 | ||
319 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ | 319 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ |
@@ -360,7 +360,7 @@ static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) | |||
360 | int syscall_nr; | 360 | int syscall_nr; |
361 | 361 | ||
362 | syscall_nr = trace_get_syscall_nr(current, regs); | 362 | syscall_nr = trace_get_syscall_nr(current, regs); |
363 | if (syscall_nr < 0) | 363 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
364 | return; | 364 | return; |
365 | 365 | ||
366 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ | 366 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ |
@@ -567,7 +567,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) | |||
567 | int size; | 567 | int size; |
568 | 568 | ||
569 | syscall_nr = trace_get_syscall_nr(current, regs); | 569 | syscall_nr = trace_get_syscall_nr(current, regs); |
570 | if (syscall_nr < 0) | 570 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
571 | return; | 571 | return; |
572 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) | 572 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) |
573 | return; | 573 | return; |
@@ -641,7 +641,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) | |||
641 | int size; | 641 | int size; |
642 | 642 | ||
643 | syscall_nr = trace_get_syscall_nr(current, regs); | 643 | syscall_nr = trace_get_syscall_nr(current, regs); |
644 | if (syscall_nr < 0) | 644 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
645 | return; | 645 | return; |
646 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) | 646 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) |
647 | return; | 647 | return; |