aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c79
-rw-r--r--kernel/futex.c23
-rw-r--r--kernel/irq/msi.c11
-rw-r--r--kernel/jump_label.c61
-rw-r--r--kernel/livepatch/core.c2
-rw-r--r--kernel/locking/qspinlock_paravirt.h2
-rw-r--r--kernel/locking/qspinlock_stat.h1
-rw-r--r--kernel/module.c121
-rw-r--r--kernel/power/hibernate.c4
-rw-r--r--kernel/printk/internal.h16
-rw-r--r--kernel/printk/nmi.c13
-rw-r--r--kernel/printk/printk.c27
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/sched/core.c19
-rw-r--r--kernel/sched/cpudeadline.c2
-rw-r--r--kernel/sched/cputime.c10
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/seccomp.c6
-rw-r--r--kernel/time/timer.c5
-rw-r--r--kernel/trace/blktrace.c6
21 files changed, 309 insertions, 110 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 356a6c7cb52a..1903b8f3a705 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -448,7 +448,7 @@ static u64 __report_allowed;
448 448
449static void perf_duration_warn(struct irq_work *w) 449static void perf_duration_warn(struct irq_work *w)
450{ 450{
451 printk_ratelimited(KERN_WARNING 451 printk_ratelimited(KERN_INFO
452 "perf: interrupt took too long (%lld > %lld), lowering " 452 "perf: interrupt took too long (%lld > %lld), lowering "
453 "kernel.perf_event_max_sample_rate to %d\n", 453 "kernel.perf_event_max_sample_rate to %d\n",
454 __report_avg, __report_allowed, 454 __report_avg, __report_allowed,
@@ -843,6 +843,32 @@ perf_cgroup_mark_enabled(struct perf_event *event,
843 } 843 }
844 } 844 }
845} 845}
846
847/*
848 * Update cpuctx->cgrp so that it is set when first cgroup event is added and
849 * cleared when last cgroup event is removed.
850 */
851static inline void
852list_update_cgroup_event(struct perf_event *event,
853 struct perf_event_context *ctx, bool add)
854{
855 struct perf_cpu_context *cpuctx;
856
857 if (!is_cgroup_event(event))
858 return;
859
860 if (add && ctx->nr_cgroups++)
861 return;
862 else if (!add && --ctx->nr_cgroups)
863 return;
864 /*
865 * Because cgroup events are always per-cpu events,
866 * this will always be called from the right CPU.
867 */
868 cpuctx = __get_cpu_context(ctx);
869 cpuctx->cgrp = add ? event->cgrp : NULL;
870}
871
846#else /* !CONFIG_CGROUP_PERF */ 872#else /* !CONFIG_CGROUP_PERF */
847 873
848static inline bool 874static inline bool
@@ -920,6 +946,13 @@ perf_cgroup_mark_enabled(struct perf_event *event,
920 struct perf_event_context *ctx) 946 struct perf_event_context *ctx)
921{ 947{
922} 948}
949
950static inline void
951list_update_cgroup_event(struct perf_event *event,
952 struct perf_event_context *ctx, bool add)
953{
954}
955
923#endif 956#endif
924 957
925/* 958/*
@@ -1392,6 +1425,7 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
1392static void 1425static void
1393list_add_event(struct perf_event *event, struct perf_event_context *ctx) 1426list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1394{ 1427{
1428
1395 lockdep_assert_held(&ctx->lock); 1429 lockdep_assert_held(&ctx->lock);
1396 1430
1397 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); 1431 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
@@ -1412,8 +1446,7 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1412 list_add_tail(&event->group_entry, list); 1446 list_add_tail(&event->group_entry, list);
1413 } 1447 }
1414 1448
1415 if (is_cgroup_event(event)) 1449 list_update_cgroup_event(event, ctx, true);
1416 ctx->nr_cgroups++;
1417 1450
1418 list_add_rcu(&event->event_entry, &ctx->event_list); 1451 list_add_rcu(&event->event_entry, &ctx->event_list);
1419 ctx->nr_events++; 1452 ctx->nr_events++;
@@ -1581,8 +1614,6 @@ static void perf_group_attach(struct perf_event *event)
1581static void 1614static void
1582list_del_event(struct perf_event *event, struct perf_event_context *ctx) 1615list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1583{ 1616{
1584 struct perf_cpu_context *cpuctx;
1585
1586 WARN_ON_ONCE(event->ctx != ctx); 1617 WARN_ON_ONCE(event->ctx != ctx);
1587 lockdep_assert_held(&ctx->lock); 1618 lockdep_assert_held(&ctx->lock);
1588 1619
@@ -1594,20 +1625,7 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1594 1625
1595 event->attach_state &= ~PERF_ATTACH_CONTEXT; 1626 event->attach_state &= ~PERF_ATTACH_CONTEXT;
1596 1627
1597 if (is_cgroup_event(event)) { 1628 list_update_cgroup_event(event, ctx, false);
1598 ctx->nr_cgroups--;
1599 /*
1600 * Because cgroup events are always per-cpu events, this will
1601 * always be called from the right CPU.
1602 */
1603 cpuctx = __get_cpu_context(ctx);
1604 /*
1605 * If there are no more cgroup events then clear cgrp to avoid
1606 * stale pointer in update_cgrp_time_from_cpuctx().
1607 */
1608 if (!ctx->nr_cgroups)
1609 cpuctx->cgrp = NULL;
1610 }
1611 1629
1612 ctx->nr_events--; 1630 ctx->nr_events--;
1613 if (event->attr.inherit_stat) 1631 if (event->attr.inherit_stat)
@@ -1716,8 +1734,8 @@ static inline int pmu_filter_match(struct perf_event *event)
1716static inline int 1734static inline int
1717event_filter_match(struct perf_event *event) 1735event_filter_match(struct perf_event *event)
1718{ 1736{
1719 return (event->cpu == -1 || event->cpu == smp_processor_id()) 1737 return (event->cpu == -1 || event->cpu == smp_processor_id()) &&
1720 && perf_cgroup_match(event) && pmu_filter_match(event); 1738 perf_cgroup_match(event) && pmu_filter_match(event);
1721} 1739}
1722 1740
1723static void 1741static void
@@ -1737,8 +1755,8 @@ event_sched_out(struct perf_event *event,
1737 * maintained, otherwise bogus information is return 1755 * maintained, otherwise bogus information is return
1738 * via read() for time_enabled, time_running: 1756 * via read() for time_enabled, time_running:
1739 */ 1757 */
1740 if (event->state == PERF_EVENT_STATE_INACTIVE 1758 if (event->state == PERF_EVENT_STATE_INACTIVE &&
1741 && !event_filter_match(event)) { 1759 !event_filter_match(event)) {
1742 delta = tstamp - event->tstamp_stopped; 1760 delta = tstamp - event->tstamp_stopped;
1743 event->tstamp_running += delta; 1761 event->tstamp_running += delta;
1744 event->tstamp_stopped = tstamp; 1762 event->tstamp_stopped = tstamp;
@@ -2236,10 +2254,15 @@ perf_install_in_context(struct perf_event_context *ctx,
2236 2254
2237 lockdep_assert_held(&ctx->mutex); 2255 lockdep_assert_held(&ctx->mutex);
2238 2256
2239 event->ctx = ctx;
2240 if (event->cpu != -1) 2257 if (event->cpu != -1)
2241 event->cpu = cpu; 2258 event->cpu = cpu;
2242 2259
2260 /*
2261 * Ensures that if we can observe event->ctx, both the event and ctx
2262 * will be 'complete'. See perf_iterate_sb_cpu().
2263 */
2264 smp_store_release(&event->ctx, ctx);
2265
2243 if (!task) { 2266 if (!task) {
2244 cpu_function_call(cpu, __perf_install_in_context, event); 2267 cpu_function_call(cpu, __perf_install_in_context, event);
2245 return; 2268 return;
@@ -5969,6 +5992,14 @@ static void perf_iterate_sb_cpu(perf_iterate_f output, void *data)
5969 struct perf_event *event; 5992 struct perf_event *event;
5970 5993
5971 list_for_each_entry_rcu(event, &pel->list, sb_list) { 5994 list_for_each_entry_rcu(event, &pel->list, sb_list) {
5995 /*
5996 * Skip events that are not fully formed yet; ensure that
5997 * if we observe event->ctx, both event and ctx will be
5998 * complete enough. See perf_install_in_context().
5999 */
6000 if (!smp_load_acquire(&event->ctx))
6001 continue;
6002
5972 if (event->state < PERF_EVENT_STATE_INACTIVE) 6003 if (event->state < PERF_EVENT_STATE_INACTIVE)
5973 continue; 6004 continue;
5974 if (!event_filter_match(event)) 6005 if (!event_filter_match(event))
diff --git a/kernel/futex.c b/kernel/futex.c
index 33664f70e2d2..46cb3a301bc1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled;
179 * Futex flags used to encode options to functions and preserve them across 179 * Futex flags used to encode options to functions and preserve them across
180 * restarts. 180 * restarts.
181 */ 181 */
182#define FLAGS_SHARED 0x01 182#ifdef CONFIG_MMU
183# define FLAGS_SHARED 0x01
184#else
185/*
186 * NOMMU does not have per process address space. Let the compiler optimize
187 * code away.
188 */
189# define FLAGS_SHARED 0x00
190#endif
183#define FLAGS_CLOCKRT 0x02 191#define FLAGS_CLOCKRT 0x02
184#define FLAGS_HAS_TIMEOUT 0x04 192#define FLAGS_HAS_TIMEOUT 0x04
185 193
@@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key)
405 if (!key->both.ptr) 413 if (!key->both.ptr)
406 return; 414 return;
407 415
416 /*
417 * On MMU less systems futexes are always "private" as there is no per
418 * process address space. We need the smp wmb nevertheless - yes,
419 * arch/blackfin has MMU less SMP ...
420 */
421 if (!IS_ENABLED(CONFIG_MMU)) {
422 smp_mb(); /* explicit smp_mb(); (B) */
423 return;
424 }
425
408 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { 426 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
409 case FUT_OFF_INODE: 427 case FUT_OFF_INODE:
410 ihold(key->shared.inode); /* implies smp_mb(); (B) */ 428 ihold(key->shared.inode); /* implies smp_mb(); (B) */
@@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key)
436 return; 454 return;
437 } 455 }
438 456
457 if (!IS_ENABLED(CONFIG_MMU))
458 return;
459
439 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { 460 switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
440 case FUT_OFF_INODE: 461 case FUT_OFF_INODE:
441 iput(key->shared.inode); 462 iput(key->shared.inode);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 54999350162c..19e9dfbe97fa 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -359,6 +359,17 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
359 else 359 else
360 dev_dbg(dev, "irq [%d-%d] for MSI\n", 360 dev_dbg(dev, "irq [%d-%d] for MSI\n",
361 virq, virq + desc->nvec_used - 1); 361 virq, virq + desc->nvec_used - 1);
362 /*
363 * This flag is set by the PCI layer as we need to activate
364 * the MSI entries before the PCI layer enables MSI in the
365 * card. Otherwise the card latches a random msi message.
366 */
367 if (info->flags & MSI_FLAG_ACTIVATE_EARLY) {
368 struct irq_data *irq_data;
369
370 irq_data = irq_domain_get_irq_data(domain, desc->irq);
371 irq_domain_activate_irq(irq_data);
372 }
362 } 373 }
363 374
364 return 0; 375 return 0;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 0dbea887d625..93ad6c1fb9b6 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -14,6 +14,7 @@
14#include <linux/err.h> 14#include <linux/err.h>
15#include <linux/static_key.h> 15#include <linux/static_key.h>
16#include <linux/jump_label_ratelimit.h> 16#include <linux/jump_label_ratelimit.h>
17#include <linux/bug.h>
17 18
18#ifdef HAVE_JUMP_LABEL 19#ifdef HAVE_JUMP_LABEL
19 20
@@ -56,6 +57,49 @@ jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
56 57
57static void jump_label_update(struct static_key *key); 58static void jump_label_update(struct static_key *key);
58 59
60/*
61 * There are similar definitions for the !HAVE_JUMP_LABEL case in jump_label.h.
62 * The use of 'atomic_read()' requires atomic.h and its problematic for some
63 * kernel headers such as kernel.h and others. Since static_key_count() is not
64 * used in the branch statements as it is for the !HAVE_JUMP_LABEL case its ok
65 * to have it be a function here. Similarly, for 'static_key_enable()' and
66 * 'static_key_disable()', which require bug.h. This should allow jump_label.h
67 * to be included from most/all places for HAVE_JUMP_LABEL.
68 */
69int static_key_count(struct static_key *key)
70{
71 /*
72 * -1 means the first static_key_slow_inc() is in progress.
73 * static_key_enabled() must return true, so return 1 here.
74 */
75 int n = atomic_read(&key->enabled);
76
77 return n >= 0 ? n : 1;
78}
79EXPORT_SYMBOL_GPL(static_key_count);
80
81void static_key_enable(struct static_key *key)
82{
83 int count = static_key_count(key);
84
85 WARN_ON_ONCE(count < 0 || count > 1);
86
87 if (!count)
88 static_key_slow_inc(key);
89}
90EXPORT_SYMBOL_GPL(static_key_enable);
91
92void static_key_disable(struct static_key *key)
93{
94 int count = static_key_count(key);
95
96 WARN_ON_ONCE(count < 0 || count > 1);
97
98 if (count)
99 static_key_slow_dec(key);
100}
101EXPORT_SYMBOL_GPL(static_key_disable);
102
59void static_key_slow_inc(struct static_key *key) 103void static_key_slow_inc(struct static_key *key)
60{ 104{
61 int v, v1; 105 int v, v1;
@@ -235,6 +279,18 @@ void __init jump_label_init(void)
235 struct static_key *key = NULL; 279 struct static_key *key = NULL;
236 struct jump_entry *iter; 280 struct jump_entry *iter;
237 281
282 /*
283 * Since we are initializing the static_key.enabled field with
284 * with the 'raw' int values (to avoid pulling in atomic.h) in
285 * jump_label.h, let's make sure that is safe. There are only two
286 * cases to check since we initialize to 0 or 1.
287 */
288 BUILD_BUG_ON((int)ATOMIC_INIT(0) != 0);
289 BUILD_BUG_ON((int)ATOMIC_INIT(1) != 1);
290
291 if (static_key_initialized)
292 return;
293
238 jump_label_lock(); 294 jump_label_lock();
239 jump_label_sort_entries(iter_start, iter_stop); 295 jump_label_sort_entries(iter_start, iter_stop);
240 296
@@ -284,11 +340,14 @@ static int __jump_label_mod_text_reserved(void *start, void *end)
284{ 340{
285 struct module *mod; 341 struct module *mod;
286 342
343 preempt_disable();
287 mod = __module_text_address((unsigned long)start); 344 mod = __module_text_address((unsigned long)start);
345 WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
346 preempt_enable();
347
288 if (!mod) 348 if (!mod)
289 return 0; 349 return 0;
290 350
291 WARN_ON_ONCE(__module_text_address((unsigned long)end) != mod);
292 351
293 return __jump_label_text_reserved(mod->jump_entries, 352 return __jump_label_text_reserved(mod->jump_entries,
294 mod->jump_entries + mod->num_jump_entries, 353 mod->jump_entries + mod->num_jump_entries,
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 5c2bc1052691..8bbe50704621 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -309,7 +309,7 @@ static int klp_write_object_relocations(struct module *pmod,
309 break; 309 break;
310 } 310 }
311 311
312 module_enable_ro(pmod); 312 module_enable_ro(pmod, true);
313 return ret; 313 return ret;
314} 314}
315 315
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 37649e69056c..8a99abf58080 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
450 goto gotlock; 450 goto gotlock;
451 } 451 }
452 } 452 }
453 WRITE_ONCE(pn->state, vcpu_halted); 453 WRITE_ONCE(pn->state, vcpu_hashed);
454 qstat_inc(qstat_pv_wait_head, true); 454 qstat_inc(qstat_pv_wait_head, true);
455 qstat_inc(qstat_pv_wait_again, waitcnt); 455 qstat_inc(qstat_pv_wait_again, waitcnt);
456 pv_wait(&l->locked, _Q_SLOW_VAL); 456 pv_wait(&l->locked, _Q_SLOW_VAL);
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index 22e025309845..b9d031516254 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -153,7 +153,6 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf,
153 */ 153 */
154 if ((counter == qstat_pv_latency_kick) || 154 if ((counter == qstat_pv_latency_kick) ||
155 (counter == qstat_pv_latency_wake)) { 155 (counter == qstat_pv_latency_wake)) {
156 stat = 0;
157 if (kicks) 156 if (kicks)
158 stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); 157 stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
159 } 158 }
diff --git a/kernel/module.c b/kernel/module.c
index a0f48b8b00da..529efae9f481 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -265,7 +265,7 @@ static void module_assert_mutex_or_preempt(void)
265 if (unlikely(!debug_locks)) 265 if (unlikely(!debug_locks))
266 return; 266 return;
267 267
268 WARN_ON(!rcu_read_lock_sched_held() && 268 WARN_ON_ONCE(!rcu_read_lock_sched_held() &&
269 !lockdep_is_held(&module_mutex)); 269 !lockdep_is_held(&module_mutex));
270#endif 270#endif
271} 271}
@@ -337,7 +337,7 @@ static inline void add_taint_module(struct module *mod, unsigned flag,
337 * A thread that wants to hold a reference to a module only while it 337 * A thread that wants to hold a reference to a module only while it
338 * is running can call this to safely exit. nfsd and lockd use this. 338 * is running can call this to safely exit. nfsd and lockd use this.
339 */ 339 */
340void __module_put_and_exit(struct module *mod, long code) 340void __noreturn __module_put_and_exit(struct module *mod, long code)
341{ 341{
342 module_put(mod); 342 module_put(mod);
343 do_exit(code); 343 do_exit(code);
@@ -1694,8 +1694,7 @@ static int module_add_modinfo_attrs(struct module *mod)
1694 1694
1695 temp_attr = mod->modinfo_attrs; 1695 temp_attr = mod->modinfo_attrs;
1696 for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) { 1696 for (i = 0; (attr = modinfo_attrs[i]) && !error; i++) {
1697 if (!attr->test || 1697 if (!attr->test || attr->test(mod)) {
1698 (attr->test && attr->test(mod))) {
1699 memcpy(temp_attr, attr, sizeof(*temp_attr)); 1698 memcpy(temp_attr, attr, sizeof(*temp_attr));
1700 sysfs_attr_init(&temp_attr->attr); 1699 sysfs_attr_init(&temp_attr->attr);
1701 error = sysfs_create_file(&mod->mkobj.kobj, 1700 error = sysfs_create_file(&mod->mkobj.kobj,
@@ -1859,10 +1858,11 @@ static void mod_sysfs_teardown(struct module *mod)
1859 * from modification and any data from execution. 1858 * from modification and any data from execution.
1860 * 1859 *
1861 * General layout of module is: 1860 * General layout of module is:
1862 * [text] [read-only-data] [writable data] 1861 * [text] [read-only-data] [ro-after-init] [writable data]
1863 * text_size -----^ ^ ^ 1862 * text_size -----^ ^ ^ ^
1864 * ro_size ------------------------| | 1863 * ro_size ------------------------| | |
1865 * size -------------------------------------------| 1864 * ro_after_init_size -----------------------------| |
1865 * size -----------------------------------------------------------|
1866 * 1866 *
1867 * These values are always page-aligned (as is base) 1867 * These values are always page-aligned (as is base)
1868 */ 1868 */
@@ -1885,14 +1885,24 @@ static void frob_rodata(const struct module_layout *layout,
1885 (layout->ro_size - layout->text_size) >> PAGE_SHIFT); 1885 (layout->ro_size - layout->text_size) >> PAGE_SHIFT);
1886} 1886}
1887 1887
1888static void frob_ro_after_init(const struct module_layout *layout,
1889 int (*set_memory)(unsigned long start, int num_pages))
1890{
1891 BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
1892 BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1));
1893 BUG_ON((unsigned long)layout->ro_after_init_size & (PAGE_SIZE-1));
1894 set_memory((unsigned long)layout->base + layout->ro_size,
1895 (layout->ro_after_init_size - layout->ro_size) >> PAGE_SHIFT);
1896}
1897
1888static void frob_writable_data(const struct module_layout *layout, 1898static void frob_writable_data(const struct module_layout *layout,
1889 int (*set_memory)(unsigned long start, int num_pages)) 1899 int (*set_memory)(unsigned long start, int num_pages))
1890{ 1900{
1891 BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1)); 1901 BUG_ON((unsigned long)layout->base & (PAGE_SIZE-1));
1892 BUG_ON((unsigned long)layout->ro_size & (PAGE_SIZE-1)); 1902 BUG_ON((unsigned long)layout->ro_after_init_size & (PAGE_SIZE-1));
1893 BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1)); 1903 BUG_ON((unsigned long)layout->size & (PAGE_SIZE-1));
1894 set_memory((unsigned long)layout->base + layout->ro_size, 1904 set_memory((unsigned long)layout->base + layout->ro_after_init_size,
1895 (layout->size - layout->ro_size) >> PAGE_SHIFT); 1905 (layout->size - layout->ro_after_init_size) >> PAGE_SHIFT);
1896} 1906}
1897 1907
1898/* livepatching wants to disable read-only so it can frob module. */ 1908/* livepatching wants to disable read-only so it can frob module. */
@@ -1900,21 +1910,26 @@ void module_disable_ro(const struct module *mod)
1900{ 1910{
1901 frob_text(&mod->core_layout, set_memory_rw); 1911 frob_text(&mod->core_layout, set_memory_rw);
1902 frob_rodata(&mod->core_layout, set_memory_rw); 1912 frob_rodata(&mod->core_layout, set_memory_rw);
1913 frob_ro_after_init(&mod->core_layout, set_memory_rw);
1903 frob_text(&mod->init_layout, set_memory_rw); 1914 frob_text(&mod->init_layout, set_memory_rw);
1904 frob_rodata(&mod->init_layout, set_memory_rw); 1915 frob_rodata(&mod->init_layout, set_memory_rw);
1905} 1916}
1906 1917
1907void module_enable_ro(const struct module *mod) 1918void module_enable_ro(const struct module *mod, bool after_init)
1908{ 1919{
1909 frob_text(&mod->core_layout, set_memory_ro); 1920 frob_text(&mod->core_layout, set_memory_ro);
1910 frob_rodata(&mod->core_layout, set_memory_ro); 1921 frob_rodata(&mod->core_layout, set_memory_ro);
1911 frob_text(&mod->init_layout, set_memory_ro); 1922 frob_text(&mod->init_layout, set_memory_ro);
1912 frob_rodata(&mod->init_layout, set_memory_ro); 1923 frob_rodata(&mod->init_layout, set_memory_ro);
1924
1925 if (after_init)
1926 frob_ro_after_init(&mod->core_layout, set_memory_ro);
1913} 1927}
1914 1928
1915static void module_enable_nx(const struct module *mod) 1929static void module_enable_nx(const struct module *mod)
1916{ 1930{
1917 frob_rodata(&mod->core_layout, set_memory_nx); 1931 frob_rodata(&mod->core_layout, set_memory_nx);
1932 frob_ro_after_init(&mod->core_layout, set_memory_nx);
1918 frob_writable_data(&mod->core_layout, set_memory_nx); 1933 frob_writable_data(&mod->core_layout, set_memory_nx);
1919 frob_rodata(&mod->init_layout, set_memory_nx); 1934 frob_rodata(&mod->init_layout, set_memory_nx);
1920 frob_writable_data(&mod->init_layout, set_memory_nx); 1935 frob_writable_data(&mod->init_layout, set_memory_nx);
@@ -1923,6 +1938,7 @@ static void module_enable_nx(const struct module *mod)
1923static void module_disable_nx(const struct module *mod) 1938static void module_disable_nx(const struct module *mod)
1924{ 1939{
1925 frob_rodata(&mod->core_layout, set_memory_x); 1940 frob_rodata(&mod->core_layout, set_memory_x);
1941 frob_ro_after_init(&mod->core_layout, set_memory_x);
1926 frob_writable_data(&mod->core_layout, set_memory_x); 1942 frob_writable_data(&mod->core_layout, set_memory_x);
1927 frob_rodata(&mod->init_layout, set_memory_x); 1943 frob_rodata(&mod->init_layout, set_memory_x);
1928 frob_writable_data(&mod->init_layout, set_memory_x); 1944 frob_writable_data(&mod->init_layout, set_memory_x);
@@ -1965,6 +1981,8 @@ static void disable_ro_nx(const struct module_layout *layout)
1965 frob_text(layout, set_memory_rw); 1981 frob_text(layout, set_memory_rw);
1966 frob_rodata(layout, set_memory_rw); 1982 frob_rodata(layout, set_memory_rw);
1967 frob_rodata(layout, set_memory_x); 1983 frob_rodata(layout, set_memory_x);
1984 frob_ro_after_init(layout, set_memory_rw);
1985 frob_ro_after_init(layout, set_memory_x);
1968 frob_writable_data(layout, set_memory_x); 1986 frob_writable_data(layout, set_memory_x);
1969} 1987}
1970 1988
@@ -2307,6 +2325,7 @@ static void layout_sections(struct module *mod, struct load_info *info)
2307 * finder in the two loops below */ 2325 * finder in the two loops below */
2308 { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL }, 2326 { SHF_EXECINSTR | SHF_ALLOC, ARCH_SHF_SMALL },
2309 { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL }, 2327 { SHF_ALLOC, SHF_WRITE | ARCH_SHF_SMALL },
2328 { SHF_RO_AFTER_INIT | SHF_ALLOC, ARCH_SHF_SMALL },
2310 { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL }, 2329 { SHF_WRITE | SHF_ALLOC, ARCH_SHF_SMALL },
2311 { ARCH_SHF_SMALL | SHF_ALLOC, 0 } 2330 { ARCH_SHF_SMALL | SHF_ALLOC, 0 }
2312 }; 2331 };
@@ -2338,7 +2357,11 @@ static void layout_sections(struct module *mod, struct load_info *info)
2338 mod->core_layout.size = debug_align(mod->core_layout.size); 2357 mod->core_layout.size = debug_align(mod->core_layout.size);
2339 mod->core_layout.ro_size = mod->core_layout.size; 2358 mod->core_layout.ro_size = mod->core_layout.size;
2340 break; 2359 break;
2341 case 3: /* whole core */ 2360 case 2: /* RO after init */
2361 mod->core_layout.size = debug_align(mod->core_layout.size);
2362 mod->core_layout.ro_after_init_size = mod->core_layout.size;
2363 break;
2364 case 4: /* whole core */
2342 mod->core_layout.size = debug_align(mod->core_layout.size); 2365 mod->core_layout.size = debug_align(mod->core_layout.size);
2343 break; 2366 break;
2344 } 2367 }
@@ -2368,7 +2391,14 @@ static void layout_sections(struct module *mod, struct load_info *info)
2368 mod->init_layout.size = debug_align(mod->init_layout.size); 2391 mod->init_layout.size = debug_align(mod->init_layout.size);
2369 mod->init_layout.ro_size = mod->init_layout.size; 2392 mod->init_layout.ro_size = mod->init_layout.size;
2370 break; 2393 break;
2371 case 3: /* whole init */ 2394 case 2:
2395 /*
2396 * RO after init doesn't apply to init_layout (only
2397 * core_layout), so it just takes the value of ro_size.
2398 */
2399 mod->init_layout.ro_after_init_size = mod->init_layout.ro_size;
2400 break;
2401 case 4: /* whole init */
2372 mod->init_layout.size = debug_align(mod->init_layout.size); 2402 mod->init_layout.size = debug_align(mod->init_layout.size);
2373 break; 2403 break;
2374 } 2404 }
@@ -2688,13 +2718,18 @@ static inline void kmemleak_load_module(const struct module *mod,
2688#endif 2718#endif
2689 2719
2690#ifdef CONFIG_MODULE_SIG 2720#ifdef CONFIG_MODULE_SIG
2691static int module_sig_check(struct load_info *info) 2721static int module_sig_check(struct load_info *info, int flags)
2692{ 2722{
2693 int err = -ENOKEY; 2723 int err = -ENOKEY;
2694 const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1; 2724 const unsigned long markerlen = sizeof(MODULE_SIG_STRING) - 1;
2695 const void *mod = info->hdr; 2725 const void *mod = info->hdr;
2696 2726
2697 if (info->len > markerlen && 2727 /*
2728 * Require flags == 0, as a module with version information
2729 * removed is no longer the module that was signed
2730 */
2731 if (flags == 0 &&
2732 info->len > markerlen &&
2698 memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) { 2733 memcmp(mod + info->len - markerlen, MODULE_SIG_STRING, markerlen) == 0) {
2699 /* We truncate the module to discard the signature */ 2734 /* We truncate the module to discard the signature */
2700 info->len -= markerlen; 2735 info->len -= markerlen;
@@ -2713,7 +2748,7 @@ static int module_sig_check(struct load_info *info)
2713 return err; 2748 return err;
2714} 2749}
2715#else /* !CONFIG_MODULE_SIG */ 2750#else /* !CONFIG_MODULE_SIG */
2716static int module_sig_check(struct load_info *info) 2751static int module_sig_check(struct load_info *info, int flags)
2717{ 2752{
2718 return 0; 2753 return 0;
2719} 2754}
@@ -2921,8 +2956,12 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
2921 return -ENOEXEC; 2956 return -ENOEXEC;
2922 } 2957 }
2923 2958
2924 if (!get_modinfo(info, "intree")) 2959 if (!get_modinfo(info, "intree")) {
2960 if (!test_taint(TAINT_OOT_MODULE))
2961 pr_warn("%s: loading out-of-tree module taints kernel.\n",
2962 mod->name);
2925 add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK); 2963 add_taint_module(mod, TAINT_OOT_MODULE, LOCKDEP_STILL_OK);
2964 }
2926 2965
2927 if (get_modinfo(info, "staging")) { 2966 if (get_modinfo(info, "staging")) {
2928 add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK); 2967 add_taint_module(mod, TAINT_CRAP, LOCKDEP_STILL_OK);
@@ -3091,6 +3130,8 @@ static int move_module(struct module *mod, struct load_info *info)
3091 3130
3092static int check_module_license_and_versions(struct module *mod) 3131static int check_module_license_and_versions(struct module *mod)
3093{ 3132{
3133 int prev_taint = test_taint(TAINT_PROPRIETARY_MODULE);
3134
3094 /* 3135 /*
3095 * ndiswrapper is under GPL by itself, but loads proprietary modules. 3136 * ndiswrapper is under GPL by itself, but loads proprietary modules.
3096 * Don't use add_taint_module(), as it would prevent ndiswrapper from 3137 * Don't use add_taint_module(), as it would prevent ndiswrapper from
@@ -3109,6 +3150,9 @@ static int check_module_license_and_versions(struct module *mod)
3109 add_taint_module(mod, TAINT_PROPRIETARY_MODULE, 3150 add_taint_module(mod, TAINT_PROPRIETARY_MODULE,
3110 LOCKDEP_NOW_UNRELIABLE); 3151 LOCKDEP_NOW_UNRELIABLE);
3111 3152
3153 if (!prev_taint && test_taint(TAINT_PROPRIETARY_MODULE))
3154 pr_warn("%s: module license taints kernel.\n", mod->name);
3155
3112#ifdef CONFIG_MODVERSIONS 3156#ifdef CONFIG_MODVERSIONS
3113 if ((mod->num_syms && !mod->crcs) 3157 if ((mod->num_syms && !mod->crcs)
3114 || (mod->num_gpl_syms && !mod->gpl_crcs) 3158 || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -3156,16 +3200,41 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
3156 return 0; 3200 return 0;
3157} 3201}
3158 3202
3203/* module_blacklist is a comma-separated list of module names */
3204static char *module_blacklist;
3205static bool blacklisted(char *module_name)
3206{
3207 const char *p;
3208 size_t len;
3209
3210 if (!module_blacklist)
3211 return false;
3212
3213 for (p = module_blacklist; *p; p += len) {
3214 len = strcspn(p, ",");
3215 if (strlen(module_name) == len && !memcmp(module_name, p, len))
3216 return true;
3217 if (p[len] == ',')
3218 len++;
3219 }
3220 return false;
3221}
3222core_param(module_blacklist, module_blacklist, charp, 0400);
3223
3159static struct module *layout_and_allocate(struct load_info *info, int flags) 3224static struct module *layout_and_allocate(struct load_info *info, int flags)
3160{ 3225{
3161 /* Module within temporary copy. */ 3226 /* Module within temporary copy. */
3162 struct module *mod; 3227 struct module *mod;
3228 unsigned int ndx;
3163 int err; 3229 int err;
3164 3230
3165 mod = setup_load_info(info, flags); 3231 mod = setup_load_info(info, flags);
3166 if (IS_ERR(mod)) 3232 if (IS_ERR(mod))
3167 return mod; 3233 return mod;
3168 3234
3235 if (blacklisted(mod->name))
3236 return ERR_PTR(-EPERM);
3237
3169 err = check_modinfo(mod, info, flags); 3238 err = check_modinfo(mod, info, flags);
3170 if (err) 3239 if (err)
3171 return ERR_PTR(err); 3240 return ERR_PTR(err);
@@ -3179,6 +3248,15 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
3179 /* We will do a special allocation for per-cpu sections later. */ 3248 /* We will do a special allocation for per-cpu sections later. */
3180 info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC; 3249 info->sechdrs[info->index.pcpu].sh_flags &= ~(unsigned long)SHF_ALLOC;
3181 3250
3251 /*
3252 * Mark ro_after_init section with SHF_RO_AFTER_INIT so that
3253 * layout_sections() can put it in the right place.
3254 * Note: ro_after_init sections also have SHF_{WRITE,ALLOC} set.
3255 */
3256 ndx = find_sec(info, ".data..ro_after_init");
3257 if (ndx)
3258 info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT;
3259
3182 /* Determine total sizes, and put offsets in sh_entsize. For now 3260 /* Determine total sizes, and put offsets in sh_entsize. For now
3183 this is done generically; there doesn't appear to be any 3261 this is done generically; there doesn't appear to be any
3184 special cases for the architectures. */ 3262 special cases for the architectures. */
@@ -3345,12 +3423,14 @@ static noinline int do_init_module(struct module *mod)
3345 /* Switch to core kallsyms now init is done: kallsyms may be walking! */ 3423 /* Switch to core kallsyms now init is done: kallsyms may be walking! */
3346 rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms); 3424 rcu_assign_pointer(mod->kallsyms, &mod->core_kallsyms);
3347#endif 3425#endif
3426 module_enable_ro(mod, true);
3348 mod_tree_remove_init(mod); 3427 mod_tree_remove_init(mod);
3349 disable_ro_nx(&mod->init_layout); 3428 disable_ro_nx(&mod->init_layout);
3350 module_arch_freeing_init(mod); 3429 module_arch_freeing_init(mod);
3351 mod->init_layout.base = NULL; 3430 mod->init_layout.base = NULL;
3352 mod->init_layout.size = 0; 3431 mod->init_layout.size = 0;
3353 mod->init_layout.ro_size = 0; 3432 mod->init_layout.ro_size = 0;
3433 mod->init_layout.ro_after_init_size = 0;
3354 mod->init_layout.text_size = 0; 3434 mod->init_layout.text_size = 0;
3355 /* 3435 /*
3356 * We want to free module_init, but be aware that kallsyms may be 3436 * We want to free module_init, but be aware that kallsyms may be
@@ -3442,8 +3522,7 @@ static int complete_formation(struct module *mod, struct load_info *info)
3442 /* This relies on module_mutex for list integrity. */ 3522 /* This relies on module_mutex for list integrity. */
3443 module_bug_finalize(info->hdr, info->sechdrs, mod); 3523 module_bug_finalize(info->hdr, info->sechdrs, mod);
3444 3524
3445 /* Set RO and NX regions */ 3525 module_enable_ro(mod, false);
3446 module_enable_ro(mod);
3447 module_enable_nx(mod); 3526 module_enable_nx(mod);
3448 3527
3449 /* Mark state as coming so strong_try_module_get() ignores us, 3528 /* Mark state as coming so strong_try_module_get() ignores us,
@@ -3499,7 +3578,7 @@ static int load_module(struct load_info *info, const char __user *uargs,
3499 long err; 3578 long err;
3500 char *after_dashes; 3579 char *after_dashes;
3501 3580
3502 err = module_sig_check(info); 3581 err = module_sig_check(info, flags);
3503 if (err) 3582 if (err)
3504 goto free_copy; 3583 goto free_copy;
3505 3584
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index a881c6a7ba74..33c79b6105c5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -300,12 +300,12 @@ static int create_image(int platform_mode)
300 save_processor_state(); 300 save_processor_state();
301 trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true); 301 trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, true);
302 error = swsusp_arch_suspend(); 302 error = swsusp_arch_suspend();
303 /* Restore control flow magically appears here */
304 restore_processor_state();
303 trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false); 305 trace_suspend_resume(TPS("machine_suspend"), PM_EVENT_HIBERNATE, false);
304 if (error) 306 if (error)
305 printk(KERN_ERR "PM: Error %d creating hibernation image\n", 307 printk(KERN_ERR "PM: Error %d creating hibernation image\n",
306 error); 308 error);
307 /* Restore control flow magically appears here */
308 restore_processor_state();
309 if (!in_suspend) 309 if (!in_suspend)
310 events_check_enabled = false; 310 events_check_enabled = false;
311 311
diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
index 5d4505f30083..7fd2838fa417 100644
--- a/kernel/printk/internal.h
+++ b/kernel/printk/internal.h
@@ -16,11 +16,9 @@
16 */ 16 */
17#include <linux/percpu.h> 17#include <linux/percpu.h>
18 18
19typedef __printf(2, 0) int (*printk_func_t)(int level, const char *fmt, 19typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args);
20 va_list args);
21 20
22__printf(2, 0) 21int __printf(1, 0) vprintk_default(const char *fmt, va_list args);
23int vprintk_default(int level, const char *fmt, va_list args);
24 22
25#ifdef CONFIG_PRINTK_NMI 23#ifdef CONFIG_PRINTK_NMI
26 24
@@ -33,10 +31,9 @@ extern raw_spinlock_t logbuf_lock;
33 * via per-CPU variable. 31 * via per-CPU variable.
34 */ 32 */
35DECLARE_PER_CPU(printk_func_t, printk_func); 33DECLARE_PER_CPU(printk_func_t, printk_func);
36__printf(2, 0) 34static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
37static inline int vprintk_func(int level, const char *fmt, va_list args)
38{ 35{
39 return this_cpu_read(printk_func)(level, fmt, args); 36 return this_cpu_read(printk_func)(fmt, args);
40} 37}
41 38
42extern atomic_t nmi_message_lost; 39extern atomic_t nmi_message_lost;
@@ -47,10 +44,9 @@ static inline int get_nmi_message_lost(void)
47 44
48#else /* CONFIG_PRINTK_NMI */ 45#else /* CONFIG_PRINTK_NMI */
49 46
50__printf(2, 0) 47static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args)
51static inline int vprintk_func(int level, const char *fmt, va_list args)
52{ 48{
53 return vprintk_default(level, fmt, args); 49 return vprintk_default(fmt, args);
54} 50}
55 51
56static inline int get_nmi_message_lost(void) 52static inline int get_nmi_message_lost(void)
diff --git a/kernel/printk/nmi.c b/kernel/printk/nmi.c
index bc3eeb1ae6da..b69eb8a2876f 100644
--- a/kernel/printk/nmi.c
+++ b/kernel/printk/nmi.c
@@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
58 * one writer running. But the buffer might get flushed from another 58 * one writer running. But the buffer might get flushed from another
59 * CPU, so we need to be careful. 59 * CPU, so we need to be careful.
60 */ 60 */
61static int vprintk_nmi(int level, const char *fmt, va_list args) 61static int vprintk_nmi(const char *fmt, va_list args)
62{ 62{
63 struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); 63 struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
64 int add = 0; 64 int add = 0;
@@ -79,16 +79,7 @@ again:
79 if (!len) 79 if (!len)
80 smp_rmb(); 80 smp_rmb();
81 81
82 if (level != LOGLEVEL_DEFAULT) { 82 add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len, fmt, args);
83 add = snprintf(s->buffer + len, sizeof(s->buffer) - len,
84 KERN_SOH "%c", '0' + level);
85 add += vsnprintf(s->buffer + len + add,
86 sizeof(s->buffer) - len - add,
87 fmt, args);
88 } else {
89 add = vsnprintf(s->buffer + len, sizeof(s->buffer) - len,
90 fmt, args);
91 }
92 83
93 /* 84 /*
94 * Do it once again if the buffer has been flushed in the meantime. 85 * Do it once again if the buffer has been flushed in the meantime.
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index a5ef95ca18c9..eea6dbc2d8cf 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1930,28 +1930,7 @@ asmlinkage int printk_emit(int facility, int level,
1930} 1930}
1931EXPORT_SYMBOL(printk_emit); 1931EXPORT_SYMBOL(printk_emit);
1932 1932
1933#ifdef CONFIG_PRINTK 1933int vprintk_default(const char *fmt, va_list args)
1934#define define_pr_level(func, loglevel) \
1935asmlinkage __visible void func(const char *fmt, ...) \
1936{ \
1937 va_list args; \
1938 \
1939 va_start(args, fmt); \
1940 vprintk_default(loglevel, fmt, args); \
1941 va_end(args); \
1942} \
1943EXPORT_SYMBOL(func)
1944
1945define_pr_level(__pr_emerg, LOGLEVEL_EMERG);
1946define_pr_level(__pr_alert, LOGLEVEL_ALERT);
1947define_pr_level(__pr_crit, LOGLEVEL_CRIT);
1948define_pr_level(__pr_err, LOGLEVEL_ERR);
1949define_pr_level(__pr_warn, LOGLEVEL_WARNING);
1950define_pr_level(__pr_notice, LOGLEVEL_NOTICE);
1951define_pr_level(__pr_info, LOGLEVEL_INFO);
1952#endif
1953
1954int vprintk_default(int level, const char *fmt, va_list args)
1955{ 1934{
1956 int r; 1935 int r;
1957 1936
@@ -1961,7 +1940,7 @@ int vprintk_default(int level, const char *fmt, va_list args)
1961 return r; 1940 return r;
1962 } 1941 }
1963#endif 1942#endif
1964 r = vprintk_emit(0, level, NULL, 0, fmt, args); 1943 r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
1965 1944
1966 return r; 1945 return r;
1967} 1946}
@@ -1994,7 +1973,7 @@ asmlinkage __visible int printk(const char *fmt, ...)
1994 int r; 1973 int r;
1995 1974
1996 va_start(args, fmt); 1975 va_start(args, fmt);
1997 r = vprintk_func(LOGLEVEL_DEFAULT, fmt, args); 1976 r = vprintk_func(fmt, args);
1998 va_end(args); 1977 va_end(args);
1999 1978
2000 return r; 1979 return r;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d49bfa1e53e6..1d3b7665d0be 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -585,8 +585,8 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data)
585 return -EINVAL; 585 return -EINVAL;
586 586
587 if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) { 587 if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
588 if (!config_enabled(CONFIG_CHECKPOINT_RESTORE) || 588 if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
589 !config_enabled(CONFIG_SECCOMP)) 589 !IS_ENABLED(CONFIG_SECCOMP))
590 return -EINVAL; 590 return -EINVAL;
591 591
592 if (!capable(CAP_SYS_ADMIN)) 592 if (!capable(CAP_SYS_ADMIN))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c883fe8e440..2a906f20fba7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -74,6 +74,7 @@
74#include <linux/context_tracking.h> 74#include <linux/context_tracking.h>
75#include <linux/compiler.h> 75#include <linux/compiler.h>
76#include <linux/frame.h> 76#include <linux/frame.h>
77#include <linux/prefetch.h>
77 78
78#include <asm/switch_to.h> 79#include <asm/switch_to.h>
79#include <asm/tlb.h> 80#include <asm/tlb.h>
@@ -2972,6 +2973,23 @@ EXPORT_PER_CPU_SYMBOL(kstat);
2972EXPORT_PER_CPU_SYMBOL(kernel_cpustat); 2973EXPORT_PER_CPU_SYMBOL(kernel_cpustat);
2973 2974
2974/* 2975/*
2976 * The function fair_sched_class.update_curr accesses the struct curr
2977 * and its field curr->exec_start; when called from task_sched_runtime(),
2978 * we observe a high rate of cache misses in practice.
2979 * Prefetching this data results in improved performance.
2980 */
2981static inline void prefetch_curr_exec_start(struct task_struct *p)
2982{
2983#ifdef CONFIG_FAIR_GROUP_SCHED
2984 struct sched_entity *curr = (&p->se)->cfs_rq->curr;
2985#else
2986 struct sched_entity *curr = (&task_rq(p)->cfs)->curr;
2987#endif
2988 prefetch(curr);
2989 prefetch(&curr->exec_start);
2990}
2991
2992/*
2975 * Return accounted runtime for the task. 2993 * Return accounted runtime for the task.
2976 * In case the task is currently running, return the runtime plus current's 2994 * In case the task is currently running, return the runtime plus current's
2977 * pending runtime that have not been accounted yet. 2995 * pending runtime that have not been accounted yet.
@@ -3005,6 +3023,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3005 * thread, breaking clock_gettime(). 3023 * thread, breaking clock_gettime().
3006 */ 3024 */
3007 if (task_current(rq, p) && task_on_rq_queued(p)) { 3025 if (task_current(rq, p) && task_on_rq_queued(p)) {
3026 prefetch_curr_exec_start(p);
3008 update_rq_clock(rq); 3027 update_rq_clock(rq);
3009 p->sched_class->update_curr(rq); 3028 p->sched_class->update_curr(rq);
3010 } 3029 }
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 5be58820465c..d4184498c9f5 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -168,7 +168,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
168 168
169 if (old_idx == IDX_INVALID) { 169 if (old_idx == IDX_INVALID) {
170 cp->size++; 170 cp->size++;
171 cp->elements[cp->size - 1].dl = 0; 171 cp->elements[cp->size - 1].dl = dl;
172 cp->elements[cp->size - 1].cpu = cpu; 172 cp->elements[cp->size - 1].cpu = cpu;
173 cp->elements[cpu].idx = cp->size - 1; 173 cp->elements[cpu].idx = cp->size - 1;
174 cpudl_change_key(cp, cp->size - 1, dl); 174 cpudl_change_key(cp, cp->size - 1, dl);
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 1934f658c036..9858266fb0b3 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -508,13 +508,21 @@ void account_process_tick(struct task_struct *p, int user_tick)
508 */ 508 */
509void account_idle_ticks(unsigned long ticks) 509void account_idle_ticks(unsigned long ticks)
510{ 510{
511 cputime_t cputime, steal;
511 512
512 if (sched_clock_irqtime) { 513 if (sched_clock_irqtime) {
513 irqtime_account_idle_ticks(ticks); 514 irqtime_account_idle_ticks(ticks);
514 return; 515 return;
515 } 516 }
516 517
517 account_idle_time(jiffies_to_cputime(ticks)); 518 cputime = jiffies_to_cputime(ticks);
519 steal = steal_account_process_time(cputime);
520
521 if (steal >= cputime)
522 return;
523
524 cputime -= steal;
525 account_idle_time(cputime);
518} 526}
519 527
520/* 528/*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fcb7f0217ff4..1ce8867283dc 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -658,8 +658,11 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
658 * 658 *
659 * XXX figure out if select_task_rq_dl() deals with offline cpus. 659 * XXX figure out if select_task_rq_dl() deals with offline cpus.
660 */ 660 */
661 if (unlikely(!rq->online)) 661 if (unlikely(!rq->online)) {
662 lockdep_unpin_lock(&rq->lock, rf.cookie);
662 rq = dl_task_offline_migration(rq, p); 663 rq = dl_task_offline_migration(rq, p);
664 rf.cookie = lockdep_pin_lock(&rq->lock);
665 }
663 666
664 /* 667 /*
665 * Queueing this task back might have overloaded rq, check if we need 668 * Queueing this task back might have overloaded rq, check if we need
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4088eedea763..039de34f1521 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4269,7 +4269,7 @@ static void sync_throttle(struct task_group *tg, int cpu)
4269 pcfs_rq = tg->parent->cfs_rq[cpu]; 4269 pcfs_rq = tg->parent->cfs_rq[cpu];
4270 4270
4271 cfs_rq->throttle_count = pcfs_rq->throttle_count; 4271 cfs_rq->throttle_count = pcfs_rq->throttle_count;
4272 pcfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu)); 4272 cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
4273} 4273}
4274 4274
4275/* conditionally throttle active cfs_rq's from put_prev_entity() */ 4275/* conditionally throttle active cfs_rq's from put_prev_entity() */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54d15eb2b701..ef6c6c3f9d8a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -347,7 +347,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
347{ 347{
348 struct seccomp_filter *sfilter; 348 struct seccomp_filter *sfilter;
349 int ret; 349 int ret;
350 const bool save_orig = config_enabled(CONFIG_CHECKPOINT_RESTORE); 350 const bool save_orig = IS_ENABLED(CONFIG_CHECKPOINT_RESTORE);
351 351
352 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS) 352 if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
353 return ERR_PTR(-EINVAL); 353 return ERR_PTR(-EINVAL);
@@ -542,7 +542,7 @@ void secure_computing_strict(int this_syscall)
542{ 542{
543 int mode = current->seccomp.mode; 543 int mode = current->seccomp.mode;
544 544
545 if (config_enabled(CONFIG_CHECKPOINT_RESTORE) && 545 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
546 unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) 546 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
547 return; 547 return;
548 548
@@ -655,7 +655,7 @@ int __secure_computing(const struct seccomp_data *sd)
655 int mode = current->seccomp.mode; 655 int mode = current->seccomp.mode;
656 int this_syscall; 656 int this_syscall;
657 657
658 if (config_enabled(CONFIG_CHECKPOINT_RESTORE) && 658 if (IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) &&
659 unlikely(current->ptrace & PT_SUSPEND_SECCOMP)) 659 unlikely(current->ptrace & PT_SUSPEND_SECCOMP))
660 return 0; 660 return 0;
661 661
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 555670a5143c..32bf6f75a8fe 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1496,6 +1496,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1496 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 1496 struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
1497 u64 expires = KTIME_MAX; 1497 u64 expires = KTIME_MAX;
1498 unsigned long nextevt; 1498 unsigned long nextevt;
1499 bool is_max_delta;
1499 1500
1500 /* 1501 /*
1501 * Pretend that there is no timer pending if the cpu is offline. 1502 * Pretend that there is no timer pending if the cpu is offline.
@@ -1506,6 +1507,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1506 1507
1507 spin_lock(&base->lock); 1508 spin_lock(&base->lock);
1508 nextevt = __next_timer_interrupt(base); 1509 nextevt = __next_timer_interrupt(base);
1510 is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA);
1509 base->next_expiry = nextevt; 1511 base->next_expiry = nextevt;
1510 /* 1512 /*
1511 * We have a fresh next event. Check whether we can forward the base: 1513 * We have a fresh next event. Check whether we can forward the base:
@@ -1519,7 +1521,8 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
1519 expires = basem; 1521 expires = basem;
1520 base->is_idle = false; 1522 base->is_idle = false;
1521 } else { 1523 } else {
1522 expires = basem + (nextevt - basej) * TICK_NSEC; 1524 if (!is_max_delta)
1525 expires = basem + (nextevt - basej) * TICK_NSEC;
1523 /* 1526 /*
1524 * If we expect to sleep more than a tick, mark the base idle: 1527 * If we expect to sleep more than a tick, mark the base idle:
1525 */ 1528 */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fb345cd11883..7598e6ca817a 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -776,7 +776,7 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
776 return; 776 return;
777 777
778 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, 778 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
779 bio_op(bio), bio->bi_rw, what, error, 0, NULL); 779 bio_op(bio), bio->bi_opf, what, error, 0, NULL);
780} 780}
781 781
782static void blk_add_trace_bio_bounce(void *ignore, 782static void blk_add_trace_bio_bounce(void *ignore,
@@ -881,7 +881,7 @@ static void blk_add_trace_split(void *ignore,
881 __be64 rpdu = cpu_to_be64(pdu); 881 __be64 rpdu = cpu_to_be64(pdu);
882 882
883 __blk_add_trace(bt, bio->bi_iter.bi_sector, 883 __blk_add_trace(bt, bio->bi_iter.bi_sector,
884 bio->bi_iter.bi_size, bio_op(bio), bio->bi_rw, 884 bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
885 BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu), 885 BLK_TA_SPLIT, bio->bi_error, sizeof(rpdu),
886 &rpdu); 886 &rpdu);
887 } 887 }
@@ -915,7 +915,7 @@ static void blk_add_trace_bio_remap(void *ignore,
915 r.sector_from = cpu_to_be64(from); 915 r.sector_from = cpu_to_be64(from);
916 916
917 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, 917 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
918 bio_op(bio), bio->bi_rw, BLK_TA_REMAP, bio->bi_error, 918 bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_error,
919 sizeof(r), &r); 919 sizeof(r), &r);
920} 920}
921 921