aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c1
-rw-r--r--kernel/bpf/verifier.c108
-rw-r--r--kernel/cpu.c3
-rw-r--r--kernel/events/core.c33
-rw-r--r--kernel/futex.c4
-rw-r--r--kernel/irq/chip.c10
-rw-r--r--kernel/irq/internals.h10
-rw-r--r--kernel/irq/manage.c63
-rw-r--r--kernel/irq/pm.c2
-rw-r--r--kernel/locking/rtmutex.c1
-rw-r--r--kernel/sched/cputime.c6
-rw-r--r--kernel/sched/deadline.c14
-rw-r--r--kernel/trace/ftrace.c41
-rw-r--r--kernel/trace/ring_buffer.c10
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace.h6
16 files changed, 223 insertions, 90 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index 833267bbd80b..6dd556931739 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -641,6 +641,7 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)
641 ac = rcu_dereference(auditd_conn); 641 ac = rcu_dereference(auditd_conn);
642 if (!ac) { 642 if (!ac) {
643 rcu_read_unlock(); 643 rcu_read_unlock();
644 kfree_skb(skb);
644 rc = -ECONNREFUSED; 645 rc = -ECONNREFUSED;
645 goto err; 646 goto err;
646 } 647 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6a86723c5b64..af9e84a4944e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -504,6 +504,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
504{ 504{
505 regs[regno].min_value = BPF_REGISTER_MIN_RANGE; 505 regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
506 regs[regno].max_value = BPF_REGISTER_MAX_RANGE; 506 regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
507 regs[regno].value_from_signed = false;
507 regs[regno].min_align = 0; 508 regs[regno].min_align = 0;
508} 509}
509 510
@@ -777,12 +778,13 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
777 return -EACCES; 778 return -EACCES;
778} 779}
779 780
780static bool is_pointer_value(struct bpf_verifier_env *env, int regno) 781static bool __is_pointer_value(bool allow_ptr_leaks,
782 const struct bpf_reg_state *reg)
781{ 783{
782 if (env->allow_ptr_leaks) 784 if (allow_ptr_leaks)
783 return false; 785 return false;
784 786
785 switch (env->cur_state.regs[regno].type) { 787 switch (reg->type) {
786 case UNKNOWN_VALUE: 788 case UNKNOWN_VALUE:
787 case CONST_IMM: 789 case CONST_IMM:
788 return false; 790 return false;
@@ -791,6 +793,11 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
791 } 793 }
792} 794}
793 795
796static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
797{
798 return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
799}
800
794static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, 801static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
795 int off, int size, bool strict) 802 int off, int size, bool strict)
796{ 803{
@@ -1832,10 +1839,24 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1832 dst_align = dst_reg->min_align; 1839 dst_align = dst_reg->min_align;
1833 1840
1834 /* We don't know anything about what was done to this register, mark it 1841 /* We don't know anything about what was done to this register, mark it
1835 * as unknown. 1842 * as unknown. Also, if both derived bounds came from signed/unsigned
1843 * mixed compares and one side is unbounded, we cannot really do anything
1844 * with them as boundaries cannot be trusted. Thus, arithmetic of two
1845 * regs of such kind will get invalidated bounds on the dst side.
1836 */ 1846 */
1837 if (min_val == BPF_REGISTER_MIN_RANGE && 1847 if ((min_val == BPF_REGISTER_MIN_RANGE &&
1838 max_val == BPF_REGISTER_MAX_RANGE) { 1848 max_val == BPF_REGISTER_MAX_RANGE) ||
1849 (BPF_SRC(insn->code) == BPF_X &&
1850 ((min_val != BPF_REGISTER_MIN_RANGE &&
1851 max_val == BPF_REGISTER_MAX_RANGE) ||
1852 (min_val == BPF_REGISTER_MIN_RANGE &&
1853 max_val != BPF_REGISTER_MAX_RANGE) ||
1854 (dst_reg->min_value != BPF_REGISTER_MIN_RANGE &&
1855 dst_reg->max_value == BPF_REGISTER_MAX_RANGE) ||
1856 (dst_reg->min_value == BPF_REGISTER_MIN_RANGE &&
1857 dst_reg->max_value != BPF_REGISTER_MAX_RANGE)) &&
1858 regs[insn->dst_reg].value_from_signed !=
1859 regs[insn->src_reg].value_from_signed)) {
1839 reset_reg_range_values(regs, insn->dst_reg); 1860 reset_reg_range_values(regs, insn->dst_reg);
1840 return; 1861 return;
1841 } 1862 }
@@ -2023,6 +2044,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
2023 regs[insn->dst_reg].max_value = insn->imm; 2044 regs[insn->dst_reg].max_value = insn->imm;
2024 regs[insn->dst_reg].min_value = insn->imm; 2045 regs[insn->dst_reg].min_value = insn->imm;
2025 regs[insn->dst_reg].min_align = calc_align(insn->imm); 2046 regs[insn->dst_reg].min_align = calc_align(insn->imm);
2047 regs[insn->dst_reg].value_from_signed = false;
2026 } 2048 }
2027 2049
2028 } else if (opcode > BPF_END) { 2050 } else if (opcode > BPF_END) {
@@ -2198,40 +2220,63 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
2198 struct bpf_reg_state *false_reg, u64 val, 2220 struct bpf_reg_state *false_reg, u64 val,
2199 u8 opcode) 2221 u8 opcode)
2200{ 2222{
2223 bool value_from_signed = true;
2224 bool is_range = true;
2225
2201 switch (opcode) { 2226 switch (opcode) {
2202 case BPF_JEQ: 2227 case BPF_JEQ:
2203 /* If this is false then we know nothing Jon Snow, but if it is 2228 /* If this is false then we know nothing Jon Snow, but if it is
2204 * true then we know for sure. 2229 * true then we know for sure.
2205 */ 2230 */
2206 true_reg->max_value = true_reg->min_value = val; 2231 true_reg->max_value = true_reg->min_value = val;
2232 is_range = false;
2207 break; 2233 break;
2208 case BPF_JNE: 2234 case BPF_JNE:
2209 /* If this is true we know nothing Jon Snow, but if it is false 2235 /* If this is true we know nothing Jon Snow, but if it is false
2210 * we know the value for sure; 2236 * we know the value for sure;
2211 */ 2237 */
2212 false_reg->max_value = false_reg->min_value = val; 2238 false_reg->max_value = false_reg->min_value = val;
2239 is_range = false;
2213 break; 2240 break;
2214 case BPF_JGT: 2241 case BPF_JGT:
2215 /* Unsigned comparison, the minimum value is 0. */ 2242 value_from_signed = false;
2216 false_reg->min_value = 0;
2217 /* fallthrough */ 2243 /* fallthrough */
2218 case BPF_JSGT: 2244 case BPF_JSGT:
2245 if (true_reg->value_from_signed != value_from_signed)
2246 reset_reg_range_values(true_reg, 0);
2247 if (false_reg->value_from_signed != value_from_signed)
2248 reset_reg_range_values(false_reg, 0);
2249 if (opcode == BPF_JGT) {
2250 /* Unsigned comparison, the minimum value is 0. */
2251 false_reg->min_value = 0;
2252 }
2219 /* If this is false then we know the maximum val is val, 2253 /* If this is false then we know the maximum val is val,
2220 * otherwise we know the min val is val+1. 2254 * otherwise we know the min val is val+1.
2221 */ 2255 */
2222 false_reg->max_value = val; 2256 false_reg->max_value = val;
2257 false_reg->value_from_signed = value_from_signed;
2223 true_reg->min_value = val + 1; 2258 true_reg->min_value = val + 1;
2259 true_reg->value_from_signed = value_from_signed;
2224 break; 2260 break;
2225 case BPF_JGE: 2261 case BPF_JGE:
2226 /* Unsigned comparison, the minimum value is 0. */ 2262 value_from_signed = false;
2227 false_reg->min_value = 0;
2228 /* fallthrough */ 2263 /* fallthrough */
2229 case BPF_JSGE: 2264 case BPF_JSGE:
2265 if (true_reg->value_from_signed != value_from_signed)
2266 reset_reg_range_values(true_reg, 0);
2267 if (false_reg->value_from_signed != value_from_signed)
2268 reset_reg_range_values(false_reg, 0);
2269 if (opcode == BPF_JGE) {
2270 /* Unsigned comparison, the minimum value is 0. */
2271 false_reg->min_value = 0;
2272 }
2230 /* If this is false then we know the maximum value is val - 1, 2273 /* If this is false then we know the maximum value is val - 1,
2231 * otherwise we know the mimimum value is val. 2274 * otherwise we know the mimimum value is val.
2232 */ 2275 */
2233 false_reg->max_value = val - 1; 2276 false_reg->max_value = val - 1;
2277 false_reg->value_from_signed = value_from_signed;
2234 true_reg->min_value = val; 2278 true_reg->min_value = val;
2279 true_reg->value_from_signed = value_from_signed;
2235 break; 2280 break;
2236 default: 2281 default:
2237 break; 2282 break;
@@ -2239,6 +2284,12 @@ static void reg_set_min_max(struct bpf_reg_state *true_reg,
2239 2284
2240 check_reg_overflow(false_reg); 2285 check_reg_overflow(false_reg);
2241 check_reg_overflow(true_reg); 2286 check_reg_overflow(true_reg);
2287 if (is_range) {
2288 if (__is_pointer_value(false, false_reg))
2289 reset_reg_range_values(false_reg, 0);
2290 if (__is_pointer_value(false, true_reg))
2291 reset_reg_range_values(true_reg, 0);
2292 }
2242} 2293}
2243 2294
2244/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg 2295/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
@@ -2248,41 +2299,64 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
2248 struct bpf_reg_state *false_reg, u64 val, 2299 struct bpf_reg_state *false_reg, u64 val,
2249 u8 opcode) 2300 u8 opcode)
2250{ 2301{
2302 bool value_from_signed = true;
2303 bool is_range = true;
2304
2251 switch (opcode) { 2305 switch (opcode) {
2252 case BPF_JEQ: 2306 case BPF_JEQ:
2253 /* If this is false then we know nothing Jon Snow, but if it is 2307 /* If this is false then we know nothing Jon Snow, but if it is
2254 * true then we know for sure. 2308 * true then we know for sure.
2255 */ 2309 */
2256 true_reg->max_value = true_reg->min_value = val; 2310 true_reg->max_value = true_reg->min_value = val;
2311 is_range = false;
2257 break; 2312 break;
2258 case BPF_JNE: 2313 case BPF_JNE:
2259 /* If this is true we know nothing Jon Snow, but if it is false 2314 /* If this is true we know nothing Jon Snow, but if it is false
2260 * we know the value for sure; 2315 * we know the value for sure;
2261 */ 2316 */
2262 false_reg->max_value = false_reg->min_value = val; 2317 false_reg->max_value = false_reg->min_value = val;
2318 is_range = false;
2263 break; 2319 break;
2264 case BPF_JGT: 2320 case BPF_JGT:
2265 /* Unsigned comparison, the minimum value is 0. */ 2321 value_from_signed = false;
2266 true_reg->min_value = 0;
2267 /* fallthrough */ 2322 /* fallthrough */
2268 case BPF_JSGT: 2323 case BPF_JSGT:
2324 if (true_reg->value_from_signed != value_from_signed)
2325 reset_reg_range_values(true_reg, 0);
2326 if (false_reg->value_from_signed != value_from_signed)
2327 reset_reg_range_values(false_reg, 0);
2328 if (opcode == BPF_JGT) {
2329 /* Unsigned comparison, the minimum value is 0. */
2330 true_reg->min_value = 0;
2331 }
2269 /* 2332 /*
2270 * If this is false, then the val is <= the register, if it is 2333 * If this is false, then the val is <= the register, if it is
2271 * true the register <= to the val. 2334 * true the register <= to the val.
2272 */ 2335 */
2273 false_reg->min_value = val; 2336 false_reg->min_value = val;
2337 false_reg->value_from_signed = value_from_signed;
2274 true_reg->max_value = val - 1; 2338 true_reg->max_value = val - 1;
2339 true_reg->value_from_signed = value_from_signed;
2275 break; 2340 break;
2276 case BPF_JGE: 2341 case BPF_JGE:
2277 /* Unsigned comparison, the minimum value is 0. */ 2342 value_from_signed = false;
2278 true_reg->min_value = 0;
2279 /* fallthrough */ 2343 /* fallthrough */
2280 case BPF_JSGE: 2344 case BPF_JSGE:
2345 if (true_reg->value_from_signed != value_from_signed)
2346 reset_reg_range_values(true_reg, 0);
2347 if (false_reg->value_from_signed != value_from_signed)
2348 reset_reg_range_values(false_reg, 0);
2349 if (opcode == BPF_JGE) {
2350 /* Unsigned comparison, the minimum value is 0. */
2351 true_reg->min_value = 0;
2352 }
2281 /* If this is false then constant < register, if it is true then 2353 /* If this is false then constant < register, if it is true then
2282 * the register < constant. 2354 * the register < constant.
2283 */ 2355 */
2284 false_reg->min_value = val + 1; 2356 false_reg->min_value = val + 1;
2357 false_reg->value_from_signed = value_from_signed;
2285 true_reg->max_value = val; 2358 true_reg->max_value = val;
2359 true_reg->value_from_signed = value_from_signed;
2286 break; 2360 break;
2287 default: 2361 default:
2288 break; 2362 break;
@@ -2290,6 +2364,12 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
2290 2364
2291 check_reg_overflow(false_reg); 2365 check_reg_overflow(false_reg);
2292 check_reg_overflow(true_reg); 2366 check_reg_overflow(true_reg);
2367 if (is_range) {
2368 if (__is_pointer_value(false, false_reg))
2369 reset_reg_range_values(false_reg, 0);
2370 if (__is_pointer_value(false, true_reg))
2371 reset_reg_range_values(true_reg, 0);
2372 }
2293} 2373}
2294 2374
2295static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id, 2375static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
diff --git a/kernel/cpu.c b/kernel/cpu.c
index ab860453841d..eee033134262 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -279,7 +279,8 @@ static int bringup_wait_for_ap(unsigned int cpu)
279 279
280 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ 280 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
281 wait_for_completion(&st->done); 281 wait_for_completion(&st->done);
282 BUG_ON(!cpu_online(cpu)); 282 if (WARN_ON_ONCE((!cpu_online(cpu))))
283 return -ECANCELED;
283 284
284 /* Unpark the stopper thread and the hotplug thread of the target cpu */ 285 /* Unpark the stopper thread and the hotplug thread of the target cpu */
285 stop_machine_unpark(cpu); 286 stop_machine_unpark(cpu);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1538df9b2b65..426c2ffba16d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1452,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event)
1452 1452
1453 lockdep_assert_held(&ctx->lock); 1453 lockdep_assert_held(&ctx->lock);
1454 1454
1455 /*
1456 * It's 'group type', really, because if our group leader is
1457 * pinned, so are we.
1458 */
1459 if (event->group_leader != event)
1460 event = event->group_leader;
1461
1455 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE; 1462 event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
1456 if (!ctx->task) 1463 if (!ctx->task)
1457 event_type |= EVENT_CPU; 1464 event_type |= EVENT_CPU;
@@ -4378,7 +4385,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value);
4378static int __perf_read_group_add(struct perf_event *leader, 4385static int __perf_read_group_add(struct perf_event *leader,
4379 u64 read_format, u64 *values) 4386 u64 read_format, u64 *values)
4380{ 4387{
4388 struct perf_event_context *ctx = leader->ctx;
4381 struct perf_event *sub; 4389 struct perf_event *sub;
4390 unsigned long flags;
4382 int n = 1; /* skip @nr */ 4391 int n = 1; /* skip @nr */
4383 int ret; 4392 int ret;
4384 4393
@@ -4408,12 +4417,15 @@ static int __perf_read_group_add(struct perf_event *leader,
4408 if (read_format & PERF_FORMAT_ID) 4417 if (read_format & PERF_FORMAT_ID)
4409 values[n++] = primary_event_id(leader); 4418 values[n++] = primary_event_id(leader);
4410 4419
4420 raw_spin_lock_irqsave(&ctx->lock, flags);
4421
4411 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 4422 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
4412 values[n++] += perf_event_count(sub); 4423 values[n++] += perf_event_count(sub);
4413 if (read_format & PERF_FORMAT_ID) 4424 if (read_format & PERF_FORMAT_ID)
4414 values[n++] = primary_event_id(sub); 4425 values[n++] = primary_event_id(sub);
4415 } 4426 }
4416 4427
4428 raw_spin_unlock_irqrestore(&ctx->lock, flags);
4417 return 0; 4429 return 0;
4418} 4430}
4419 4431
@@ -7321,21 +7333,6 @@ int perf_event_account_interrupt(struct perf_event *event)
7321 return __perf_event_account_interrupt(event, 1); 7333 return __perf_event_account_interrupt(event, 1);
7322} 7334}
7323 7335
7324static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
7325{
7326 /*
7327 * Due to interrupt latency (AKA "skid"), we may enter the
7328 * kernel before taking an overflow, even if the PMU is only
7329 * counting user events.
7330 * To avoid leaking information to userspace, we must always
7331 * reject kernel samples when exclude_kernel is set.
7332 */
7333 if (event->attr.exclude_kernel && !user_mode(regs))
7334 return false;
7335
7336 return true;
7337}
7338
7339/* 7336/*
7340 * Generic event overflow handling, sampling. 7337 * Generic event overflow handling, sampling.
7341 */ 7338 */
@@ -7357,12 +7354,6 @@ static int __perf_event_overflow(struct perf_event *event,
7357 ret = __perf_event_account_interrupt(event, throttle); 7354 ret = __perf_event_account_interrupt(event, throttle);
7358 7355
7359 /* 7356 /*
7360 * For security, drop the skid kernel samples if necessary.
7361 */
7362 if (!sample_is_allowed(event, regs))
7363 return ret;
7364
7365 /*
7366 * XXX event_limit might not quite work as expected on inherited 7357 * XXX event_limit might not quite work as expected on inherited
7367 * events 7358 * events
7368 */ 7359 */
diff --git a/kernel/futex.c b/kernel/futex.c
index c934689043b2..16dbe4c93895 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -212,7 +212,7 @@ struct futex_pi_state {
212 atomic_t refcount; 212 atomic_t refcount;
213 213
214 union futex_key key; 214 union futex_key key;
215}; 215} __randomize_layout;
216 216
217/** 217/**
218 * struct futex_q - The hashed futex queue entry, one per waiting task 218 * struct futex_q - The hashed futex queue entry, one per waiting task
@@ -246,7 +246,7 @@ struct futex_q {
246 struct rt_mutex_waiter *rt_waiter; 246 struct rt_mutex_waiter *rt_waiter;
247 union futex_key *requeue_pi_key; 247 union futex_key *requeue_pi_key;
248 u32 bitset; 248 u32 bitset;
249}; 249} __randomize_layout;
250 250
251static const struct futex_q futex_q_init = { 251static const struct futex_q futex_q_init = {
252 /* list gets initialized in queue_me()*/ 252 /* list gets initialized in queue_me()*/
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index d171bc57e1e0..a3cc37c0c85e 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -170,21 +170,11 @@ static void irq_state_clr_disabled(struct irq_desc *desc)
170 irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED); 170 irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
171} 171}
172 172
173static void irq_state_set_disabled(struct irq_desc *desc)
174{
175 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
176}
177
178static void irq_state_clr_masked(struct irq_desc *desc) 173static void irq_state_clr_masked(struct irq_desc *desc)
179{ 174{
180 irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED); 175 irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
181} 176}
182 177
183static void irq_state_set_masked(struct irq_desc *desc)
184{
185 irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
186}
187
188static void irq_state_clr_started(struct irq_desc *desc) 178static void irq_state_clr_started(struct irq_desc *desc)
189{ 179{
190 irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED); 180 irqd_clear(&desc->irq_data, IRQD_IRQ_STARTED);
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index dbfba9933ed2..a2c48058354c 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -227,6 +227,16 @@ static inline bool irqd_has_set(struct irq_data *d, unsigned int mask)
227 return __irqd_to_state(d) & mask; 227 return __irqd_to_state(d) & mask;
228} 228}
229 229
230static inline void irq_state_set_disabled(struct irq_desc *desc)
231{
232 irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
233}
234
235static inline void irq_state_set_masked(struct irq_desc *desc)
236{
237 irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
238}
239
230#undef __irqd_to_state 240#undef __irqd_to_state
231 241
232static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc) 242static inline void kstat_incr_irqs_this_cpu(struct irq_desc *desc)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 5624b2dd6b58..1d1a5b945ab4 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1090,6 +1090,16 @@ setup_irq_thread(struct irqaction *new, unsigned int irq, bool secondary)
1090/* 1090/*
1091 * Internal function to register an irqaction - typically used to 1091 * Internal function to register an irqaction - typically used to
1092 * allocate special interrupts that are part of the architecture. 1092 * allocate special interrupts that are part of the architecture.
1093 *
1094 * Locking rules:
1095 *
1096 * desc->request_mutex Provides serialization against a concurrent free_irq()
1097 * chip_bus_lock Provides serialization for slow bus operations
1098 * desc->lock Provides serialization against hard interrupts
1099 *
1100 * chip_bus_lock and desc->lock are sufficient for all other management and
1101 * interrupt related functions. desc->request_mutex solely serializes
1102 * request/free_irq().
1093 */ 1103 */
1094static int 1104static int
1095__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) 1105__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
@@ -1167,20 +1177,35 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1167 if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE) 1177 if (desc->irq_data.chip->flags & IRQCHIP_ONESHOT_SAFE)
1168 new->flags &= ~IRQF_ONESHOT; 1178 new->flags &= ~IRQF_ONESHOT;
1169 1179
1180 /*
1181 * Protects against a concurrent __free_irq() call which might wait
1182 * for synchronize_irq() to complete without holding the optional
1183 * chip bus lock and desc->lock.
1184 */
1170 mutex_lock(&desc->request_mutex); 1185 mutex_lock(&desc->request_mutex);
1186
1187 /*
1188 * Acquire bus lock as the irq_request_resources() callback below
1189 * might rely on the serialization or the magic power management
1190 * functions which are abusing the irq_bus_lock() callback,
1191 */
1192 chip_bus_lock(desc);
1193
1194 /* First installed action requests resources. */
1171 if (!desc->action) { 1195 if (!desc->action) {
1172 ret = irq_request_resources(desc); 1196 ret = irq_request_resources(desc);
1173 if (ret) { 1197 if (ret) {
1174 pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n", 1198 pr_err("Failed to request resources for %s (irq %d) on irqchip %s\n",
1175 new->name, irq, desc->irq_data.chip->name); 1199 new->name, irq, desc->irq_data.chip->name);
1176 goto out_mutex; 1200 goto out_bus_unlock;
1177 } 1201 }
1178 } 1202 }
1179 1203
1180 chip_bus_lock(desc);
1181
1182 /* 1204 /*
1183 * The following block of code has to be executed atomically 1205 * The following block of code has to be executed atomically
1206 * protected against a concurrent interrupt and any of the other
1207 * management calls which are not serialized via
1208 * desc->request_mutex or the optional bus lock.
1184 */ 1209 */
1185 raw_spin_lock_irqsave(&desc->lock, flags); 1210 raw_spin_lock_irqsave(&desc->lock, flags);
1186 old_ptr = &desc->action; 1211 old_ptr = &desc->action;
@@ -1286,10 +1311,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
1286 ret = __irq_set_trigger(desc, 1311 ret = __irq_set_trigger(desc,
1287 new->flags & IRQF_TRIGGER_MASK); 1312 new->flags & IRQF_TRIGGER_MASK);
1288 1313
1289 if (ret) { 1314 if (ret)
1290 irq_release_resources(desc);
1291 goto out_unlock; 1315 goto out_unlock;
1292 }
1293 } 1316 }
1294 1317
1295 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \ 1318 desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
@@ -1385,12 +1408,10 @@ mismatch:
1385out_unlock: 1408out_unlock:
1386 raw_spin_unlock_irqrestore(&desc->lock, flags); 1409 raw_spin_unlock_irqrestore(&desc->lock, flags);
1387 1410
1388 chip_bus_sync_unlock(desc);
1389
1390 if (!desc->action) 1411 if (!desc->action)
1391 irq_release_resources(desc); 1412 irq_release_resources(desc);
1392 1413out_bus_unlock:
1393out_mutex: 1414 chip_bus_sync_unlock(desc);
1394 mutex_unlock(&desc->request_mutex); 1415 mutex_unlock(&desc->request_mutex);
1395 1416
1396out_thread: 1417out_thread:
@@ -1472,6 +1493,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
1472 WARN(1, "Trying to free already-free IRQ %d\n", irq); 1493 WARN(1, "Trying to free already-free IRQ %d\n", irq);
1473 raw_spin_unlock_irqrestore(&desc->lock, flags); 1494 raw_spin_unlock_irqrestore(&desc->lock, flags);
1474 chip_bus_sync_unlock(desc); 1495 chip_bus_sync_unlock(desc);
1496 mutex_unlock(&desc->request_mutex);
1475 return NULL; 1497 return NULL;
1476 } 1498 }
1477 1499
@@ -1498,6 +1520,20 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
1498#endif 1520#endif
1499 1521
1500 raw_spin_unlock_irqrestore(&desc->lock, flags); 1522 raw_spin_unlock_irqrestore(&desc->lock, flags);
1523 /*
1524 * Drop bus_lock here so the changes which were done in the chip
1525 * callbacks above are synced out to the irq chips which hang
1526 * behind a slow bus (I2C, SPI) before calling synchronize_irq().
1527 *
1528 * Aside of that the bus_lock can also be taken from the threaded
1529 * handler in irq_finalize_oneshot() which results in a deadlock
1530 * because synchronize_irq() would wait forever for the thread to
1531 * complete, which is blocked on the bus lock.
1532 *
1533 * The still held desc->request_mutex() protects against a
1534 * concurrent request_irq() of this irq so the release of resources
1535 * and timing data is properly serialized.
1536 */
1501 chip_bus_sync_unlock(desc); 1537 chip_bus_sync_unlock(desc);
1502 1538
1503 unregister_handler_proc(irq, action); 1539 unregister_handler_proc(irq, action);
@@ -1530,8 +1566,15 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
1530 } 1566 }
1531 } 1567 }
1532 1568
1569 /* Last action releases resources */
1533 if (!desc->action) { 1570 if (!desc->action) {
1571 /*
1572 * Reaquire bus lock as irq_release_resources() might
1573 * require it to deallocate resources over the slow bus.
1574 */
1575 chip_bus_lock(desc);
1534 irq_release_resources(desc); 1576 irq_release_resources(desc);
1577 chip_bus_sync_unlock(desc);
1535 irq_remove_timings(desc); 1578 irq_remove_timings(desc);
1536 } 1579 }
1537 1580
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index cea1de0161f1..6bd9b58429cc 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -149,6 +149,8 @@ static void resume_irq(struct irq_desc *desc)
149 149
150 /* Pretend that it got disabled ! */ 150 /* Pretend that it got disabled ! */
151 desc->depth++; 151 desc->depth++;
152 irq_state_set_disabled(desc);
153 irq_state_set_masked(desc);
152resume: 154resume:
153 desc->istate &= ~IRQS_SUSPENDED; 155 desc->istate &= ~IRQS_SUSPENDED;
154 __enable_irq(desc); 156 __enable_irq(desc);
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 78069895032a..649dc9d3951a 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -963,7 +963,6 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
963 return -EDEADLK; 963 return -EDEADLK;
964 964
965 raw_spin_lock(&task->pi_lock); 965 raw_spin_lock(&task->pi_lock);
966 rt_mutex_adjust_prio(task);
967 waiter->task = task; 966 waiter->task = task;
968 waiter->lock = lock; 967 waiter->lock = lock;
969 waiter->prio = task->prio; 968 waiter->prio = task->prio;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 6e3ea4ac1bda..14d2dbf97c53 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -683,7 +683,7 @@ static u64 vtime_delta(struct vtime *vtime)
683{ 683{
684 unsigned long long clock; 684 unsigned long long clock;
685 685
686 clock = sched_clock_cpu(smp_processor_id()); 686 clock = sched_clock();
687 if (clock < vtime->starttime) 687 if (clock < vtime->starttime)
688 return 0; 688 return 0;
689 689
@@ -814,7 +814,7 @@ void arch_vtime_task_switch(struct task_struct *prev)
814 814
815 write_seqcount_begin(&vtime->seqcount); 815 write_seqcount_begin(&vtime->seqcount);
816 vtime->state = VTIME_SYS; 816 vtime->state = VTIME_SYS;
817 vtime->starttime = sched_clock_cpu(smp_processor_id()); 817 vtime->starttime = sched_clock();
818 write_seqcount_end(&vtime->seqcount); 818 write_seqcount_end(&vtime->seqcount);
819} 819}
820 820
@@ -826,7 +826,7 @@ void vtime_init_idle(struct task_struct *t, int cpu)
826 local_irq_save(flags); 826 local_irq_save(flags);
827 write_seqcount_begin(&vtime->seqcount); 827 write_seqcount_begin(&vtime->seqcount);
828 vtime->state = VTIME_SYS; 828 vtime->state = VTIME_SYS;
829 vtime->starttime = sched_clock_cpu(cpu); 829 vtime->starttime = sched_clock();
830 write_seqcount_end(&vtime->seqcount); 830 write_seqcount_end(&vtime->seqcount);
831 local_irq_restore(flags); 831 local_irq_restore(flags);
832} 832}
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a84299f44b5d..755bd3f1a1a9 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1392,17 +1392,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
1392 struct sched_dl_entity *pi_se = &p->dl; 1392 struct sched_dl_entity *pi_se = &p->dl;
1393 1393
1394 /* 1394 /*
1395 * Use the scheduling parameters of the top pi-waiter 1395 * Use the scheduling parameters of the top pi-waiter task if:
1396 * task if we have one and its (absolute) deadline is 1396 * - we have a top pi-waiter which is a SCHED_DEADLINE task AND
1397 * smaller than our one... OTW we keep our runtime and 1397 * - our dl_boosted is set (i.e. the pi-waiter's (absolute) deadline is
1398 * deadline. 1398 * smaller than our deadline OR we are a !SCHED_DEADLINE task getting
1399 * boosted due to a SCHED_DEADLINE pi-waiter).
1400 * Otherwise we keep our runtime and deadline.
1399 */ 1401 */
1400 if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) { 1402 if (pi_task && dl_prio(pi_task->normal_prio) && p->dl.dl_boosted) {
1401 pi_se = &pi_task->dl; 1403 pi_se = &pi_task->dl;
1402 } else if (!dl_prio(p->normal_prio)) { 1404 } else if (!dl_prio(p->normal_prio)) {
1403 /* 1405 /*
1404 * Special case in which we have a !SCHED_DEADLINE task 1406 * Special case in which we have a !SCHED_DEADLINE task
1405 * that is going to be deboosted, but exceedes its 1407 * that is going to be deboosted, but exceeds its
1406 * runtime while doing so. No point in replenishing 1408 * runtime while doing so. No point in replenishing
1407 * it, as it's going to return back to its original 1409 * it, as it's going to return back to its original
1408 * scheduling class after this. 1410 * scheduling class after this.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 53f6b6401cf0..02004ae91860 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -113,7 +113,7 @@ static int ftrace_disabled __read_mostly;
113 113
114static DEFINE_MUTEX(ftrace_lock); 114static DEFINE_MUTEX(ftrace_lock);
115 115
116static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; 116static struct ftrace_ops __rcu *ftrace_ops_list __read_mostly = &ftrace_list_end;
117ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; 117ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
118static struct ftrace_ops global_ops; 118static struct ftrace_ops global_ops;
119 119
@@ -169,8 +169,11 @@ int ftrace_nr_registered_ops(void)
169 169
170 mutex_lock(&ftrace_lock); 170 mutex_lock(&ftrace_lock);
171 171
172 for (ops = ftrace_ops_list; 172 for (ops = rcu_dereference_protected(ftrace_ops_list,
173 ops != &ftrace_list_end; ops = ops->next) 173 lockdep_is_held(&ftrace_lock));
174 ops != &ftrace_list_end;
175 ops = rcu_dereference_protected(ops->next,
176 lockdep_is_held(&ftrace_lock)))
174 cnt++; 177 cnt++;
175 178
176 mutex_unlock(&ftrace_lock); 179 mutex_unlock(&ftrace_lock);
@@ -275,10 +278,11 @@ static void update_ftrace_function(void)
275 * If there's only one ftrace_ops registered, the ftrace_ops_list 278 * If there's only one ftrace_ops registered, the ftrace_ops_list
276 * will point to the ops we want. 279 * will point to the ops we want.
277 */ 280 */
278 set_function_trace_op = ftrace_ops_list; 281 set_function_trace_op = rcu_dereference_protected(ftrace_ops_list,
282 lockdep_is_held(&ftrace_lock));
279 283
280 /* If there's no ftrace_ops registered, just call the stub function */ 284 /* If there's no ftrace_ops registered, just call the stub function */
281 if (ftrace_ops_list == &ftrace_list_end) { 285 if (set_function_trace_op == &ftrace_list_end) {
282 func = ftrace_stub; 286 func = ftrace_stub;
283 287
284 /* 288 /*
@@ -286,7 +290,8 @@ static void update_ftrace_function(void)
286 * recursion safe and not dynamic and the arch supports passing ops, 290 * recursion safe and not dynamic and the arch supports passing ops,
287 * then have the mcount trampoline call the function directly. 291 * then have the mcount trampoline call the function directly.
288 */ 292 */
289 } else if (ftrace_ops_list->next == &ftrace_list_end) { 293 } else if (rcu_dereference_protected(ftrace_ops_list->next,
294 lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
290 func = ftrace_ops_get_list_func(ftrace_ops_list); 295 func = ftrace_ops_get_list_func(ftrace_ops_list);
291 296
292 } else { 297 } else {
@@ -348,9 +353,11 @@ int using_ftrace_ops_list_func(void)
348 return ftrace_trace_function == ftrace_ops_list_func; 353 return ftrace_trace_function == ftrace_ops_list_func;
349} 354}
350 355
351static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) 356static void add_ftrace_ops(struct ftrace_ops __rcu **list,
357 struct ftrace_ops *ops)
352{ 358{
353 ops->next = *list; 359 rcu_assign_pointer(ops->next, *list);
360
354 /* 361 /*
355 * We are entering ops into the list but another 362 * We are entering ops into the list but another
356 * CPU might be walking that list. We need to make sure 363 * CPU might be walking that list. We need to make sure
@@ -360,7 +367,8 @@ static void add_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
360 rcu_assign_pointer(*list, ops); 367 rcu_assign_pointer(*list, ops);
361} 368}
362 369
363static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops) 370static int remove_ftrace_ops(struct ftrace_ops __rcu **list,
371 struct ftrace_ops *ops)
364{ 372{
365 struct ftrace_ops **p; 373 struct ftrace_ops **p;
366 374
@@ -368,7 +376,10 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
368 * If we are removing the last function, then simply point 376 * If we are removing the last function, then simply point
369 * to the ftrace_stub. 377 * to the ftrace_stub.
370 */ 378 */
371 if (*list == ops && ops->next == &ftrace_list_end) { 379 if (rcu_dereference_protected(*list,
380 lockdep_is_held(&ftrace_lock)) == ops &&
381 rcu_dereference_protected(ops->next,
382 lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
372 *list = &ftrace_list_end; 383 *list = &ftrace_list_end;
373 return 0; 384 return 0;
374 } 385 }
@@ -1569,8 +1580,8 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
1569 return 0; 1580 return 0;
1570#endif 1581#endif
1571 1582
1572 hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash); 1583 rcu_assign_pointer(hash.filter_hash, ops->func_hash->filter_hash);
1573 hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash); 1584 rcu_assign_pointer(hash.notrace_hash, ops->func_hash->notrace_hash);
1574 1585
1575 if (hash_contains_ip(ip, &hash)) 1586 if (hash_contains_ip(ip, &hash))
1576 ret = 1; 1587 ret = 1;
@@ -2840,7 +2851,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
2840 * If there's no more ops registered with ftrace, run a 2851 * If there's no more ops registered with ftrace, run a
2841 * sanity check to make sure all rec flags are cleared. 2852 * sanity check to make sure all rec flags are cleared.
2842 */ 2853 */
2843 if (ftrace_ops_list == &ftrace_list_end) { 2854 if (rcu_dereference_protected(ftrace_ops_list,
2855 lockdep_is_held(&ftrace_lock)) == &ftrace_list_end) {
2844 struct ftrace_page *pg; 2856 struct ftrace_page *pg;
2845 struct dyn_ftrace *rec; 2857 struct dyn_ftrace *rec;
2846 2858
@@ -6453,7 +6465,8 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
6453 if (ftrace_enabled) { 6465 if (ftrace_enabled) {
6454 6466
6455 /* we are starting ftrace again */ 6467 /* we are starting ftrace again */
6456 if (ftrace_ops_list != &ftrace_list_end) 6468 if (rcu_dereference_protected(ftrace_ops_list,
6469 lockdep_is_held(&ftrace_lock)) != &ftrace_list_end)
6457 update_ftrace_function(); 6470 update_ftrace_function();
6458 6471
6459 ftrace_startup_sysctl(); 6472 ftrace_startup_sysctl();
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4ae268e687fe..529cc50d7243 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1136,12 +1136,12 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
1136 for (i = 0; i < nr_pages; i++) { 1136 for (i = 0; i < nr_pages; i++) {
1137 struct page *page; 1137 struct page *page;
1138 /* 1138 /*
1139 * __GFP_NORETRY flag makes sure that the allocation fails 1139 * __GFP_RETRY_MAYFAIL flag makes sure that the allocation fails
1140 * gracefully without invoking oom-killer and the system is 1140 * gracefully without invoking oom-killer and the system is not
1141 * not destabilized. 1141 * destabilized.
1142 */ 1142 */
1143 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()), 1143 bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
1144 GFP_KERNEL | __GFP_NORETRY, 1144 GFP_KERNEL | __GFP_RETRY_MAYFAIL,
1145 cpu_to_node(cpu)); 1145 cpu_to_node(cpu));
1146 if (!bpage) 1146 if (!bpage)
1147 goto free_pages; 1147 goto free_pages;
@@ -1149,7 +1149,7 @@ static int __rb_allocate_pages(long nr_pages, struct list_head *pages, int cpu)
1149 list_add(&bpage->list, pages); 1149 list_add(&bpage->list, pages);
1150 1150
1151 page = alloc_pages_node(cpu_to_node(cpu), 1151 page = alloc_pages_node(cpu_to_node(cpu),
1152 GFP_KERNEL | __GFP_NORETRY, 0); 1152 GFP_KERNEL | __GFP_RETRY_MAYFAIL, 0);
1153 if (!page) 1153 if (!page)
1154 goto free_pages; 1154 goto free_pages;
1155 bpage->page = page_address(page); 1155 bpage->page = page_address(page);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 2d0ffcc49dba..42b9355033d4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -7774,6 +7774,7 @@ static int instance_rmdir(const char *name)
7774 } 7774 }
7775 kfree(tr->topts); 7775 kfree(tr->topts);
7776 7776
7777 free_cpumask_var(tr->tracing_cpumask);
7777 kfree(tr->name); 7778 kfree(tr->name);
7778 kfree(tr); 7779 kfree(tr);
7779 7780
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6ade1c55cc3a..490ba229931d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1210,9 +1210,9 @@ struct ftrace_event_field {
1210struct event_filter { 1210struct event_filter {
1211 int n_preds; /* Number assigned */ 1211 int n_preds; /* Number assigned */
1212 int a_preds; /* allocated */ 1212 int a_preds; /* allocated */
1213 struct filter_pred *preds; 1213 struct filter_pred __rcu *preds;
1214 struct filter_pred *root; 1214 struct filter_pred __rcu *root;
1215 char *filter_string; 1215 char *filter_string;
1216}; 1216};
1217 1217
1218struct event_subsystem { 1218struct event_subsystem {