aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-04-22 11:47:36 -0400
committerJens Axboe <axboe@kernel.dk>2019-04-22 11:47:36 -0400
commit5c61ee2cd5860e41c8ab98837761ffaa93eb4dfe (patch)
tree0c78e25f5020eeee47863092ccbb2a3f56bea8a9 /kernel
parentcdf3e3deb747d5e193dee617ed37c83060eb576f (diff)
parent085b7755808aa11f78ab9377257e1dad2e6fa4bb (diff)
Merge tag 'v5.1-rc6' into for-5.2/block
Pull in v5.1-rc6 to resolve two conflicts. One is in BFQ, in just a comment, and is trivial. The other one is a conflict due to a later fix in the bio multi-page work, and needs a bit more care. * tag 'v5.1-rc6': (770 commits) Linux 5.1-rc6 block: make sure that bvec length can't be overflow block: kill all_q_node in request_queue x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping mm/kmemleak.c: fix unused-function warning init: initialize jump labels before command line option parsing kernel/watchdog_hld.c: hard lockup message should end with a newline kcov: improve CONFIG_ARCH_HAS_KCOV help text mm: fix inactive list balancing between NUMA nodes and cgroups mm/hotplug: treat CMA pages as unmovable proc: fixup proc-pid-vm test proc: fix map_files test on F29 mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n mm/memory_hotplug: do not unlock after failing to take the device_hotplug_lock mm: swapoff: shmem_unuse() stop eviction without igrab() mm: swapoff: take notice of completion sooner mm: swapoff: remove too limiting SWAP_UNUSE_MAX_TRIES mm: swapoff: shmem_find_swap_entries() filter out other types slab: store tagged freelist for off-slab slabmgmt ... Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/cpumap.c13
-rw-r--r--kernel/bpf/inode.c32
-rw-r--r--kernel/bpf/verifier.c5
-rw-r--r--kernel/dma/debug.c2
-rw-r--r--kernel/events/core.c89
-rw-r--r--kernel/events/ring_buffer.c37
-rw-r--r--kernel/irq/chip.c4
-rw-r--r--kernel/irq/irqdesc.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/locking/lockdep.c20
-rw-r--r--kernel/sched/deadline.c3
-rw-r--r--kernel/sched/fair.c31
-rw-r--r--kernel/seccomp.c2
-rw-r--r--kernel/signal.c15
-rw-r--r--kernel/sysctl.c3
-rw-r--r--kernel/time/alarmtimer.c2
-rw-r--r--kernel/time/sched_clock.c4
-rw-r--r--kernel/time/tick-common.c2
-rw-r--r--kernel/time/timekeeping.h7
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace_syscalls.c9
-rw-r--r--kernel/watchdog_hld.c3
23 files changed, 195 insertions, 107 deletions
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8974b3755670..3c18260403dd 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -162,10 +162,14 @@ static void cpu_map_kthread_stop(struct work_struct *work)
162static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu, 162static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
163 struct xdp_frame *xdpf) 163 struct xdp_frame *xdpf)
164{ 164{
165 unsigned int hard_start_headroom;
165 unsigned int frame_size; 166 unsigned int frame_size;
166 void *pkt_data_start; 167 void *pkt_data_start;
167 struct sk_buff *skb; 168 struct sk_buff *skb;
168 169
170 /* Part of headroom was reserved to xdpf */
171 hard_start_headroom = sizeof(struct xdp_frame) + xdpf->headroom;
172
169 /* build_skb need to place skb_shared_info after SKB end, and 173 /* build_skb need to place skb_shared_info after SKB end, and
170 * also want to know the memory "truesize". Thus, need to 174 * also want to know the memory "truesize". Thus, need to
171 * know the memory frame size backing xdp_buff. 175 * know the memory frame size backing xdp_buff.
@@ -183,15 +187,15 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
183 * is not at a fixed memory location, with mixed length 187 * is not at a fixed memory location, with mixed length
184 * packets, which is bad for cache-line hotness. 188 * packets, which is bad for cache-line hotness.
185 */ 189 */
186 frame_size = SKB_DATA_ALIGN(xdpf->len + xdpf->headroom) + 190 frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
187 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); 191 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
188 192
189 pkt_data_start = xdpf->data - xdpf->headroom; 193 pkt_data_start = xdpf->data - hard_start_headroom;
190 skb = build_skb(pkt_data_start, frame_size); 194 skb = build_skb(pkt_data_start, frame_size);
191 if (!skb) 195 if (!skb)
192 return NULL; 196 return NULL;
193 197
194 skb_reserve(skb, xdpf->headroom); 198 skb_reserve(skb, hard_start_headroom);
195 __skb_put(skb, xdpf->len); 199 __skb_put(skb, xdpf->len);
196 if (xdpf->metasize) 200 if (xdpf->metasize)
197 skb_metadata_set(skb, xdpf->metasize); 201 skb_metadata_set(skb, xdpf->metasize);
@@ -205,6 +209,9 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
205 * - RX ring dev queue index (skb_record_rx_queue) 209 * - RX ring dev queue index (skb_record_rx_queue)
206 */ 210 */
207 211
212 /* Allow SKB to reuse area used by xdp_frame */
213 xdp_scrub_frame(xdpf);
214
208 return skb; 215 return skb;
209} 216}
210 217
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 2ada5e21dfa6..4a8f390a2b82 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -554,19 +554,6 @@ struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type typ
554} 554}
555EXPORT_SYMBOL(bpf_prog_get_type_path); 555EXPORT_SYMBOL(bpf_prog_get_type_path);
556 556
557static void bpf_evict_inode(struct inode *inode)
558{
559 enum bpf_type type;
560
561 truncate_inode_pages_final(&inode->i_data);
562 clear_inode(inode);
563
564 if (S_ISLNK(inode->i_mode))
565 kfree(inode->i_link);
566 if (!bpf_inode_type(inode, &type))
567 bpf_any_put(inode->i_private, type);
568}
569
570/* 557/*
571 * Display the mount options in /proc/mounts. 558 * Display the mount options in /proc/mounts.
572 */ 559 */
@@ -579,11 +566,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root)
579 return 0; 566 return 0;
580} 567}
581 568
569static void bpf_destroy_inode_deferred(struct rcu_head *head)
570{
571 struct inode *inode = container_of(head, struct inode, i_rcu);
572 enum bpf_type type;
573
574 if (S_ISLNK(inode->i_mode))
575 kfree(inode->i_link);
576 if (!bpf_inode_type(inode, &type))
577 bpf_any_put(inode->i_private, type);
578 free_inode_nonrcu(inode);
579}
580
581static void bpf_destroy_inode(struct inode *inode)
582{
583 call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred);
584}
585
582static const struct super_operations bpf_super_ops = { 586static const struct super_operations bpf_super_ops = {
583 .statfs = simple_statfs, 587 .statfs = simple_statfs,
584 .drop_inode = generic_delete_inode, 588 .drop_inode = generic_delete_inode,
585 .show_options = bpf_show_options, 589 .show_options = bpf_show_options,
586 .evict_inode = bpf_evict_inode, 590 .destroy_inode = bpf_destroy_inode,
587}; 591};
588 592
589enum { 593enum {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fd502c1f71eb..6c5a41f7f338 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1897,8 +1897,9 @@ continue_func:
1897 } 1897 }
1898 frame++; 1898 frame++;
1899 if (frame >= MAX_CALL_FRAMES) { 1899 if (frame >= MAX_CALL_FRAMES) {
1900 WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); 1900 verbose(env, "the call stack of %d frames is too deep !\n",
1901 return -EFAULT; 1901 frame);
1902 return -E2BIG;
1902 } 1903 }
1903 goto process_func; 1904 goto process_func;
1904 } 1905 }
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 45d51e8e26f6..a218e43cc382 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -706,7 +706,7 @@ static struct dma_debug_entry *dma_entry_alloc(void)
706#ifdef CONFIG_STACKTRACE 706#ifdef CONFIG_STACKTRACE
707 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; 707 entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
708 entry->stacktrace.entries = entry->st_entries; 708 entry->stacktrace.entries = entry->st_entries;
709 entry->stacktrace.skip = 2; 709 entry->stacktrace.skip = 1;
710 save_stack_trace(&entry->stacktrace); 710 save_stack_trace(&entry->stacktrace);
711#endif 711#endif
712 712
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 72d06e302e99..dc7dead2d2cc 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2009,8 +2009,8 @@ event_sched_out(struct perf_event *event,
2009 event->pmu->del(event, 0); 2009 event->pmu->del(event, 0);
2010 event->oncpu = -1; 2010 event->oncpu = -1;
2011 2011
2012 if (event->pending_disable) { 2012 if (READ_ONCE(event->pending_disable) >= 0) {
2013 event->pending_disable = 0; 2013 WRITE_ONCE(event->pending_disable, -1);
2014 state = PERF_EVENT_STATE_OFF; 2014 state = PERF_EVENT_STATE_OFF;
2015 } 2015 }
2016 perf_event_set_state(event, state); 2016 perf_event_set_state(event, state);
@@ -2198,7 +2198,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable);
2198 2198
2199void perf_event_disable_inatomic(struct perf_event *event) 2199void perf_event_disable_inatomic(struct perf_event *event)
2200{ 2200{
2201 event->pending_disable = 1; 2201 WRITE_ONCE(event->pending_disable, smp_processor_id());
2202 /* can fail, see perf_pending_event_disable() */
2202 irq_work_queue(&event->pending); 2203 irq_work_queue(&event->pending);
2203} 2204}
2204 2205
@@ -5810,10 +5811,45 @@ void perf_event_wakeup(struct perf_event *event)
5810 } 5811 }
5811} 5812}
5812 5813
5814static void perf_pending_event_disable(struct perf_event *event)
5815{
5816 int cpu = READ_ONCE(event->pending_disable);
5817
5818 if (cpu < 0)
5819 return;
5820
5821 if (cpu == smp_processor_id()) {
5822 WRITE_ONCE(event->pending_disable, -1);
5823 perf_event_disable_local(event);
5824 return;
5825 }
5826
5827 /*
5828 * CPU-A CPU-B
5829 *
5830 * perf_event_disable_inatomic()
5831 * @pending_disable = CPU-A;
5832 * irq_work_queue();
5833 *
5834 * sched-out
5835 * @pending_disable = -1;
5836 *
5837 * sched-in
5838 * perf_event_disable_inatomic()
5839 * @pending_disable = CPU-B;
5840 * irq_work_queue(); // FAILS
5841 *
5842 * irq_work_run()
5843 * perf_pending_event()
5844 *
5845 * But the event runs on CPU-B and wants disabling there.
5846 */
5847 irq_work_queue_on(&event->pending, cpu);
5848}
5849
5813static void perf_pending_event(struct irq_work *entry) 5850static void perf_pending_event(struct irq_work *entry)
5814{ 5851{
5815 struct perf_event *event = container_of(entry, 5852 struct perf_event *event = container_of(entry, struct perf_event, pending);
5816 struct perf_event, pending);
5817 int rctx; 5853 int rctx;
5818 5854
5819 rctx = perf_swevent_get_recursion_context(); 5855 rctx = perf_swevent_get_recursion_context();
@@ -5822,10 +5858,7 @@ static void perf_pending_event(struct irq_work *entry)
5822 * and we won't recurse 'further'. 5858 * and we won't recurse 'further'.
5823 */ 5859 */
5824 5860
5825 if (event->pending_disable) { 5861 perf_pending_event_disable(event);
5826 event->pending_disable = 0;
5827 perf_event_disable_local(event);
5828 }
5829 5862
5830 if (event->pending_wakeup) { 5863 if (event->pending_wakeup) {
5831 event->pending_wakeup = 0; 5864 event->pending_wakeup = 0;
@@ -9044,26 +9077,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
9044 if (task == TASK_TOMBSTONE) 9077 if (task == TASK_TOMBSTONE)
9045 return; 9078 return;
9046 9079
9047 if (!ifh->nr_file_filters) 9080 if (ifh->nr_file_filters) {
9048 return; 9081 mm = get_task_mm(event->ctx->task);
9049 9082 if (!mm)
9050 mm = get_task_mm(event->ctx->task); 9083 goto restart;
9051 if (!mm)
9052 goto restart;
9053 9084
9054 down_read(&mm->mmap_sem); 9085 down_read(&mm->mmap_sem);
9086 }
9055 9087
9056 raw_spin_lock_irqsave(&ifh->lock, flags); 9088 raw_spin_lock_irqsave(&ifh->lock, flags);
9057 list_for_each_entry(filter, &ifh->list, entry) { 9089 list_for_each_entry(filter, &ifh->list, entry) {
9058 event->addr_filter_ranges[count].start = 0; 9090 if (filter->path.dentry) {
9059 event->addr_filter_ranges[count].size = 0; 9091 /*
9092 * Adjust base offset if the filter is associated to a
9093 * binary that needs to be mapped:
9094 */
9095 event->addr_filter_ranges[count].start = 0;
9096 event->addr_filter_ranges[count].size = 0;
9060 9097
9061 /*
9062 * Adjust base offset if the filter is associated to a binary
9063 * that needs to be mapped:
9064 */
9065 if (filter->path.dentry)
9066 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); 9098 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]);
9099 } else {
9100 event->addr_filter_ranges[count].start = filter->offset;
9101 event->addr_filter_ranges[count].size = filter->size;
9102 }
9067 9103
9068 count++; 9104 count++;
9069 } 9105 }
@@ -9071,9 +9107,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event)
9071 event->addr_filters_gen++; 9107 event->addr_filters_gen++;
9072 raw_spin_unlock_irqrestore(&ifh->lock, flags); 9108 raw_spin_unlock_irqrestore(&ifh->lock, flags);
9073 9109
9074 up_read(&mm->mmap_sem); 9110 if (ifh->nr_file_filters) {
9111 up_read(&mm->mmap_sem);
9075 9112
9076 mmput(mm); 9113 mmput(mm);
9114 }
9077 9115
9078restart: 9116restart:
9079 perf_event_stop(event, 1); 9117 perf_event_stop(event, 1);
@@ -10236,6 +10274,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
10236 10274
10237 10275
10238 init_waitqueue_head(&event->waitq); 10276 init_waitqueue_head(&event->waitq);
10277 event->pending_disable = -1;
10239 init_irq_work(&event->pending, perf_pending_event); 10278 init_irq_work(&event->pending, perf_pending_event);
10240 10279
10241 mutex_init(&event->mmap_mutex); 10280 mutex_init(&event->mmap_mutex);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index a4047321d7d8..5eedb49a65ea 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -392,7 +392,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
392 * store that will be enabled on successful return 392 * store that will be enabled on successful return
393 */ 393 */
394 if (!handle->size) { /* A, matches D */ 394 if (!handle->size) { /* A, matches D */
395 event->pending_disable = 1; 395 event->pending_disable = smp_processor_id();
396 perf_output_wakeup(handle); 396 perf_output_wakeup(handle);
397 local_set(&rb->aux_nest, 0); 397 local_set(&rb->aux_nest, 0);
398 goto err_put; 398 goto err_put;
@@ -455,24 +455,21 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
455 rb->aux_head += size; 455 rb->aux_head += size;
456 } 456 }
457 457
458 if (size || handle->aux_flags) { 458 /*
459 /* 459 * Only send RECORD_AUX if we have something useful to communicate
460 * Only send RECORD_AUX if we have something useful to communicate 460 *
461 * 461 * Note: the OVERWRITE records by themselves are not considered
462 * Note: the OVERWRITE records by themselves are not considered 462 * useful, as they don't communicate any *new* information,
463 * useful, as they don't communicate any *new* information, 463 * aside from the short-lived offset, that becomes history at
464 * aside from the short-lived offset, that becomes history at 464 * the next event sched-in and therefore isn't useful.
465 * the next event sched-in and therefore isn't useful. 465 * The userspace that needs to copy out AUX data in overwrite
466 * The userspace that needs to copy out AUX data in overwrite 466 * mode should know to use user_page::aux_head for the actual
467 * mode should know to use user_page::aux_head for the actual 467 * offset. So, from now on we don't output AUX records that
468 * offset. So, from now on we don't output AUX records that 468 * have *only* OVERWRITE flag set.
469 * have *only* OVERWRITE flag set. 469 */
470 */ 470 if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE))
471 471 perf_event_aux_event(handle->event, aux_head, size,
472 if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE) 472 handle->aux_flags);
473 perf_event_aux_event(handle->event, aux_head, size,
474 handle->aux_flags);
475 }
476 473
477 rb->user_page->aux_head = rb->aux_head; 474 rb->user_page->aux_head = rb->aux_head;
478 if (rb_need_aux_wakeup(rb)) 475 if (rb_need_aux_wakeup(rb))
@@ -480,7 +477,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size)
480 477
481 if (wakeup) { 478 if (wakeup) {
482 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED) 479 if (handle->aux_flags & PERF_AUX_FLAG_TRUNCATED)
483 handle->event->pending_disable = 1; 480 handle->event->pending_disable = smp_processor_id();
484 perf_output_wakeup(handle); 481 perf_output_wakeup(handle);
485 } 482 }
486 483
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 3faef4a77f71..51128bea3846 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1449,6 +1449,10 @@ int irq_chip_set_vcpu_affinity_parent(struct irq_data *data, void *vcpu_info)
1449int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on) 1449int irq_chip_set_wake_parent(struct irq_data *data, unsigned int on)
1450{ 1450{
1451 data = data->parent_data; 1451 data = data->parent_data;
1452
1453 if (data->chip->flags & IRQCHIP_SKIP_SET_WAKE)
1454 return 0;
1455
1452 if (data->chip->irq_set_wake) 1456 if (data->chip->irq_set_wake)
1453 return data->chip->irq_set_wake(data, on); 1457 return data->chip->irq_set_wake(data, on);
1454 1458
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 13539e12cd80..9f8a709337cf 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -558,6 +558,7 @@ int __init early_irq_init(void)
558 alloc_masks(&desc[i], node); 558 alloc_masks(&desc[i], node);
559 raw_spin_lock_init(&desc[i].lock); 559 raw_spin_lock_init(&desc[i].lock);
560 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); 560 lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
561 mutex_init(&desc[i].request_mutex);
561 desc_set_defaults(i, &desc[i], node, NULL, NULL); 562 desc_set_defaults(i, &desc[i], node, NULL, NULL);
562 } 563 }
563 return arch_early_irq_init(); 564 return arch_early_irq_init();
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c83e54727131..b1ea30a5540e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -709,7 +709,6 @@ static void unoptimize_kprobe(struct kprobe *p, bool force)
709static int reuse_unused_kprobe(struct kprobe *ap) 709static int reuse_unused_kprobe(struct kprobe *ap)
710{ 710{
711 struct optimized_kprobe *op; 711 struct optimized_kprobe *op;
712 int ret;
713 712
714 /* 713 /*
715 * Unused kprobe MUST be on the way of delayed unoptimizing (means 714 * Unused kprobe MUST be on the way of delayed unoptimizing (means
@@ -720,9 +719,8 @@ static int reuse_unused_kprobe(struct kprobe *ap)
720 /* Enable the probe again */ 719 /* Enable the probe again */
721 ap->flags &= ~KPROBE_FLAG_DISABLED; 720 ap->flags &= ~KPROBE_FLAG_DISABLED;
722 /* Optimize it again (remove from op->list) */ 721 /* Optimize it again (remove from op->list) */
723 ret = kprobe_optready(ap); 722 if (!kprobe_optready(ap))
724 if (ret) 723 return -EINVAL;
725 return ret;
726 724
727 optimize_kprobe(ap); 725 optimize_kprobe(ap);
728 return 0; 726 return 0;
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 34cdcbedda49..e221be724fe8 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4689,8 +4689,8 @@ static void free_zapped_rcu(struct rcu_head *ch)
4689 return; 4689 return;
4690 4690
4691 raw_local_irq_save(flags); 4691 raw_local_irq_save(flags);
4692 if (!graph_lock()) 4692 arch_spin_lock(&lockdep_lock);
4693 goto out_irq; 4693 current->lockdep_recursion = 1;
4694 4694
4695 /* closed head */ 4695 /* closed head */
4696 pf = delayed_free.pf + (delayed_free.index ^ 1); 4696 pf = delayed_free.pf + (delayed_free.index ^ 1);
@@ -4702,8 +4702,8 @@ static void free_zapped_rcu(struct rcu_head *ch)
4702 */ 4702 */
4703 call_rcu_zapped(delayed_free.pf + delayed_free.index); 4703 call_rcu_zapped(delayed_free.pf + delayed_free.index);
4704 4704
4705 graph_unlock(); 4705 current->lockdep_recursion = 0;
4706out_irq: 4706 arch_spin_unlock(&lockdep_lock);
4707 raw_local_irq_restore(flags); 4707 raw_local_irq_restore(flags);
4708} 4708}
4709 4709
@@ -4744,21 +4744,17 @@ static void lockdep_free_key_range_reg(void *start, unsigned long size)
4744{ 4744{
4745 struct pending_free *pf; 4745 struct pending_free *pf;
4746 unsigned long flags; 4746 unsigned long flags;
4747 int locked;
4748 4747
4749 init_data_structures_once(); 4748 init_data_structures_once();
4750 4749
4751 raw_local_irq_save(flags); 4750 raw_local_irq_save(flags);
4752 locked = graph_lock(); 4751 arch_spin_lock(&lockdep_lock);
4753 if (!locked) 4752 current->lockdep_recursion = 1;
4754 goto out_irq;
4755
4756 pf = get_pending_free(); 4753 pf = get_pending_free();
4757 __lockdep_free_key_range(pf, start, size); 4754 __lockdep_free_key_range(pf, start, size);
4758 call_rcu_zapped(pf); 4755 call_rcu_zapped(pf);
4759 4756 current->lockdep_recursion = 0;
4760 graph_unlock(); 4757 arch_spin_unlock(&lockdep_lock);
4761out_irq:
4762 raw_local_irq_restore(flags); 4758 raw_local_irq_restore(flags);
4763 4759
4764 /* 4760 /*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 6a73e41a2016..43901fa3f269 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -252,7 +252,6 @@ static void task_non_contending(struct task_struct *p)
252 if (dl_entity_is_special(dl_se)) 252 if (dl_entity_is_special(dl_se))
253 return; 253 return;
254 254
255 WARN_ON(hrtimer_active(&dl_se->inactive_timer));
256 WARN_ON(dl_se->dl_non_contending); 255 WARN_ON(dl_se->dl_non_contending);
257 256
258 zerolag_time = dl_se->deadline - 257 zerolag_time = dl_se->deadline -
@@ -269,7 +268,7 @@ static void task_non_contending(struct task_struct *p)
269 * If the "0-lag time" already passed, decrease the active 268 * If the "0-lag time" already passed, decrease the active
270 * utilization now, instead of starting a timer 269 * utilization now, instead of starting a timer
271 */ 270 */
272 if (zerolag_time < 0) { 271 if ((zerolag_time < 0) || hrtimer_active(&dl_se->inactive_timer)) {
273 if (dl_task(p)) 272 if (dl_task(p))
274 sub_running_bw(dl_se, dl_rq); 273 sub_running_bw(dl_se, dl_rq);
275 if (!dl_task(p) || p->state == TASK_DEAD) { 274 if (!dl_task(p) || p->state == TASK_DEAD) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fdab7eb6f351..a4d9e14bf138 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4885,6 +4885,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
4885 return HRTIMER_NORESTART; 4885 return HRTIMER_NORESTART;
4886} 4886}
4887 4887
4888extern const u64 max_cfs_quota_period;
4889
4888static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) 4890static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
4889{ 4891{
4890 struct cfs_bandwidth *cfs_b = 4892 struct cfs_bandwidth *cfs_b =
@@ -4892,6 +4894,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
4892 unsigned long flags; 4894 unsigned long flags;
4893 int overrun; 4895 int overrun;
4894 int idle = 0; 4896 int idle = 0;
4897 int count = 0;
4895 4898
4896 raw_spin_lock_irqsave(&cfs_b->lock, flags); 4899 raw_spin_lock_irqsave(&cfs_b->lock, flags);
4897 for (;;) { 4900 for (;;) {
@@ -4899,6 +4902,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
4899 if (!overrun) 4902 if (!overrun)
4900 break; 4903 break;
4901 4904
4905 if (++count > 3) {
4906 u64 new, old = ktime_to_ns(cfs_b->period);
4907
4908 new = (old * 147) / 128; /* ~115% */
4909 new = min(new, max_cfs_quota_period);
4910
4911 cfs_b->period = ns_to_ktime(new);
4912
4913 /* since max is 1s, this is limited to 1e9^2, which fits in u64 */
4914 cfs_b->quota *= new;
4915 cfs_b->quota = div64_u64(cfs_b->quota, old);
4916
4917 pr_warn_ratelimited(
4918 "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n",
4919 smp_processor_id(),
4920 div_u64(new, NSEC_PER_USEC),
4921 div_u64(cfs_b->quota, NSEC_PER_USEC));
4922
4923 /* reset count so we don't come right back in here */
4924 count = 0;
4925 }
4926
4902 idle = do_sched_cfs_period_timer(cfs_b, overrun, flags); 4927 idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
4903 } 4928 }
4904 if (idle) 4929 if (idle)
@@ -7784,10 +7809,10 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
7784 if (cfs_rq->last_h_load_update == now) 7809 if (cfs_rq->last_h_load_update == now)
7785 return; 7810 return;
7786 7811
7787 cfs_rq->h_load_next = NULL; 7812 WRITE_ONCE(cfs_rq->h_load_next, NULL);
7788 for_each_sched_entity(se) { 7813 for_each_sched_entity(se) {
7789 cfs_rq = cfs_rq_of(se); 7814 cfs_rq = cfs_rq_of(se);
7790 cfs_rq->h_load_next = se; 7815 WRITE_ONCE(cfs_rq->h_load_next, se);
7791 if (cfs_rq->last_h_load_update == now) 7816 if (cfs_rq->last_h_load_update == now)
7792 break; 7817 break;
7793 } 7818 }
@@ -7797,7 +7822,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
7797 cfs_rq->last_h_load_update = now; 7822 cfs_rq->last_h_load_update = now;
7798 } 7823 }
7799 7824
7800 while ((se = cfs_rq->h_load_next) != NULL) { 7825 while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
7801 load = cfs_rq->h_load; 7826 load = cfs_rq->h_load;
7802 load = div64_ul(load * se->avg.load_avg, 7827 load = div64_ul(load * se->avg.load_avg,
7803 cfs_rq_load_avg(cfs_rq) + 1); 7828 cfs_rq_load_avg(cfs_rq) + 1);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca812..df27e499956a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
149 149
150 sd->nr = syscall_get_nr(task, regs); 150 sd->nr = syscall_get_nr(task, regs);
151 sd->arch = syscall_get_arch(); 151 sd->arch = syscall_get_arch();
152 syscall_get_arguments(task, regs, 0, 6, args); 152 syscall_get_arguments(task, regs, args);
153 sd->args[0] = args[0]; 153 sd->args[0] = args[0];
154 sd->args[1] = args[1]; 154 sd->args[1] = args[1];
155 sd->args[2] = args[2]; 155 sd->args[2] = args[2];
diff --git a/kernel/signal.c b/kernel/signal.c
index b7953934aa99..227ba170298e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3581,7 +3581,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
3581 if (flags) 3581 if (flags)
3582 return -EINVAL; 3582 return -EINVAL;
3583 3583
3584 f = fdget_raw(pidfd); 3584 f = fdget(pidfd);
3585 if (!f.file) 3585 if (!f.file)
3586 return -EBADF; 3586 return -EBADF;
3587 3587
@@ -3605,16 +3605,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
3605 if (unlikely(sig != kinfo.si_signo)) 3605 if (unlikely(sig != kinfo.si_signo))
3606 goto err; 3606 goto err;
3607 3607
3608 /* Only allow sending arbitrary signals to yourself. */
3609 ret = -EPERM;
3608 if ((task_pid(current) != pid) && 3610 if ((task_pid(current) != pid) &&
3609 (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL)) { 3611 (kinfo.si_code >= 0 || kinfo.si_code == SI_TKILL))
3610 /* Only allow sending arbitrary signals to yourself. */ 3612 goto err;
3611 ret = -EPERM;
3612 if (kinfo.si_code != SI_USER)
3613 goto err;
3614
3615 /* Turn this into a regular kill signal. */
3616 prepare_kill_siginfo(sig, &kinfo);
3617 }
3618 } else { 3613 } else {
3619 prepare_kill_siginfo(sig, &kinfo); 3614 prepare_kill_siginfo(sig, &kinfo);
3620 } 3615 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e5da394d1ca3..c9ec050bcf46 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -128,6 +128,7 @@ static int zero;
128static int __maybe_unused one = 1; 128static int __maybe_unused one = 1;
129static int __maybe_unused two = 2; 129static int __maybe_unused two = 2;
130static int __maybe_unused four = 4; 130static int __maybe_unused four = 4;
131static unsigned long zero_ul;
131static unsigned long one_ul = 1; 132static unsigned long one_ul = 1;
132static unsigned long long_max = LONG_MAX; 133static unsigned long long_max = LONG_MAX;
133static int one_hundred = 100; 134static int one_hundred = 100;
@@ -1750,7 +1751,7 @@ static struct ctl_table fs_table[] = {
1750 .maxlen = sizeof(files_stat.max_files), 1751 .maxlen = sizeof(files_stat.max_files),
1751 .mode = 0644, 1752 .mode = 0644,
1752 .proc_handler = proc_doulongvec_minmax, 1753 .proc_handler = proc_doulongvec_minmax,
1753 .extra1 = &zero, 1754 .extra1 = &zero_ul,
1754 .extra2 = &long_max, 1755 .extra2 = &long_max,
1755 }, 1756 },
1756 { 1757 {
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 2c97e8c2d29f..0519a8805aab 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -594,7 +594,7 @@ static ktime_t alarm_timer_remaining(struct k_itimer *timr, ktime_t now)
594{ 594{
595 struct alarm *alarm = &timr->it.alarm.alarmtimer; 595 struct alarm *alarm = &timr->it.alarm.alarmtimer;
596 596
597 return ktime_sub(now, alarm->node.expires); 597 return ktime_sub(alarm->node.expires, now);
598} 598}
599 599
600/** 600/**
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index 094b82ca95e5..930113b9799a 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -272,7 +272,7 @@ static u64 notrace suspended_sched_clock_read(void)
272 return cd.read_data[seq & 1].epoch_cyc; 272 return cd.read_data[seq & 1].epoch_cyc;
273} 273}
274 274
275static int sched_clock_suspend(void) 275int sched_clock_suspend(void)
276{ 276{
277 struct clock_read_data *rd = &cd.read_data[0]; 277 struct clock_read_data *rd = &cd.read_data[0];
278 278
@@ -283,7 +283,7 @@ static int sched_clock_suspend(void)
283 return 0; 283 return 0;
284} 284}
285 285
286static void sched_clock_resume(void) 286void sched_clock_resume(void)
287{ 287{
288 struct clock_read_data *rd = &cd.read_data[0]; 288 struct clock_read_data *rd = &cd.read_data[0];
289 289
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 529143b4c8d2..df401463a191 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -487,6 +487,7 @@ void tick_freeze(void)
487 trace_suspend_resume(TPS("timekeeping_freeze"), 487 trace_suspend_resume(TPS("timekeeping_freeze"),
488 smp_processor_id(), true); 488 smp_processor_id(), true);
489 system_state = SYSTEM_SUSPEND; 489 system_state = SYSTEM_SUSPEND;
490 sched_clock_suspend();
490 timekeeping_suspend(); 491 timekeeping_suspend();
491 } else { 492 } else {
492 tick_suspend_local(); 493 tick_suspend_local();
@@ -510,6 +511,7 @@ void tick_unfreeze(void)
510 511
511 if (tick_freeze_depth == num_online_cpus()) { 512 if (tick_freeze_depth == num_online_cpus()) {
512 timekeeping_resume(); 513 timekeeping_resume();
514 sched_clock_resume();
513 system_state = SYSTEM_RUNNING; 515 system_state = SYSTEM_RUNNING;
514 trace_suspend_resume(TPS("timekeeping_freeze"), 516 trace_suspend_resume(TPS("timekeeping_freeze"),
515 smp_processor_id(), false); 517 smp_processor_id(), false);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb7a1d5..141ab3ab0354 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -14,6 +14,13 @@ extern u64 timekeeping_max_deferment(void);
14extern void timekeeping_warp_clock(void); 14extern void timekeeping_warp_clock(void);
15extern int timekeeping_suspend(void); 15extern int timekeeping_suspend(void);
16extern void timekeeping_resume(void); 16extern void timekeeping_resume(void);
17#ifdef CONFIG_GENERIC_SCHED_CLOCK
18extern int sched_clock_suspend(void);
19extern void sched_clock_resume(void);
20#else
21static inline int sched_clock_suspend(void) { return 0; }
22static inline void sched_clock_resume(void) { }
23#endif
17 24
18extern void do_timer(unsigned long ticks); 25extern void do_timer(unsigned long ticks);
19extern void update_wall_time(void); 26extern void update_wall_time(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 26c8ca9bd06b..b920358dd8f7 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -33,6 +33,7 @@
33#include <linux/list.h> 33#include <linux/list.h>
34#include <linux/hash.h> 34#include <linux/hash.h>
35#include <linux/rcupdate.h> 35#include <linux/rcupdate.h>
36#include <linux/kprobes.h>
36 37
37#include <trace/events/sched.h> 38#include <trace/events/sched.h>
38 39
@@ -6246,7 +6247,7 @@ void ftrace_reset_array_ops(struct trace_array *tr)
6246 tr->ops->func = ftrace_stub; 6247 tr->ops->func = ftrace_stub;
6247} 6248}
6248 6249
6249static inline void 6250static nokprobe_inline void
6250__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, 6251__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
6251 struct ftrace_ops *ignored, struct pt_regs *regs) 6252 struct ftrace_ops *ignored, struct pt_regs *regs)
6252{ 6253{
@@ -6306,11 +6307,13 @@ static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
6306{ 6307{
6307 __ftrace_ops_list_func(ip, parent_ip, NULL, regs); 6308 __ftrace_ops_list_func(ip, parent_ip, NULL, regs);
6308} 6309}
6310NOKPROBE_SYMBOL(ftrace_ops_list_func);
6309#else 6311#else
6310static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) 6312static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
6311{ 6313{
6312 __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); 6314 __ftrace_ops_list_func(ip, parent_ip, NULL, NULL);
6313} 6315}
6316NOKPROBE_SYMBOL(ftrace_ops_no_ops);
6314#endif 6317#endif
6315 6318
6316/* 6319/*
@@ -6337,6 +6340,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
6337 preempt_enable_notrace(); 6340 preempt_enable_notrace();
6338 trace_clear_recursion(bit); 6341 trace_clear_recursion(bit);
6339} 6342}
6343NOKPROBE_SYMBOL(ftrace_ops_assist_func);
6340 6344
6341/** 6345/**
6342 * ftrace_ops_get_func - get the function a trampoline should call 6346 * ftrace_ops_get_func - get the function a trampoline should call
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 21153e64bf1c..6c24755655c7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7041,12 +7041,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7041 buf->private = 0; 7041 buf->private = 0;
7042} 7042}
7043 7043
7044static void buffer_pipe_buf_get(struct pipe_inode_info *pipe, 7044static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7045 struct pipe_buffer *buf) 7045 struct pipe_buffer *buf)
7046{ 7046{
7047 struct buffer_ref *ref = (struct buffer_ref *)buf->private; 7047 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7048 7048
7049 if (ref->ref > INT_MAX/2)
7050 return false;
7051
7049 ref->ref++; 7052 ref->ref++;
7053 return true;
7050} 7054}
7051 7055
7052/* Pipe buffer operations for a buffer. */ 7056/* Pipe buffer operations for a buffer. */
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index f93a56d2db27..fa8fbff736d6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -314,6 +314,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
314 struct ring_buffer_event *event; 314 struct ring_buffer_event *event;
315 struct ring_buffer *buffer; 315 struct ring_buffer *buffer;
316 unsigned long irq_flags; 316 unsigned long irq_flags;
317 unsigned long args[6];
317 int pc; 318 int pc;
318 int syscall_nr; 319 int syscall_nr;
319 int size; 320 int size;
@@ -347,7 +348,8 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
347 348
348 entry = ring_buffer_event_data(event); 349 entry = ring_buffer_event_data(event);
349 entry->nr = syscall_nr; 350 entry->nr = syscall_nr;
350 syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args); 351 syscall_get_arguments(current, regs, args);
352 memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
351 353
352 event_trigger_unlock_commit(trace_file, buffer, event, entry, 354 event_trigger_unlock_commit(trace_file, buffer, event, entry,
353 irq_flags, pc); 355 irq_flags, pc);
@@ -583,6 +585,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
583 struct syscall_metadata *sys_data; 585 struct syscall_metadata *sys_data;
584 struct syscall_trace_enter *rec; 586 struct syscall_trace_enter *rec;
585 struct hlist_head *head; 587 struct hlist_head *head;
588 unsigned long args[6];
586 bool valid_prog_array; 589 bool valid_prog_array;
587 int syscall_nr; 590 int syscall_nr;
588 int rctx; 591 int rctx;
@@ -613,8 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
613 return; 616 return;
614 617
615 rec->nr = syscall_nr; 618 rec->nr = syscall_nr;
616 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 619 syscall_get_arguments(current, regs, args);
617 (unsigned long *)&rec->args); 620 memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
618 621
619 if ((valid_prog_array && 622 if ((valid_prog_array &&
620 !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || 623 !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) ||
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 71381168dede..247bf0b1582c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -135,7 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
135 if (__this_cpu_read(hard_watchdog_warn) == true) 135 if (__this_cpu_read(hard_watchdog_warn) == true)
136 return; 136 return;
137 137
138 pr_emerg("Watchdog detected hard LOCKUP on cpu %d", this_cpu); 138 pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
139 this_cpu);
139 print_modules(); 140 print_modules();
140 print_irqtrace_events(current); 141 print_irqtrace_events(current);
141 if (regs) 142 if (regs)