diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2013-10-04 17:33:34 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2013-12-03 13:10:18 -0500 |
commit | 96d3fd0d315a949e30adc80f086031c5cdf070d1 (patch) | |
tree | 0fe7013d59b4d69a91bf031c0a53e8d279413e4a | |
parent | 78e4bc34e5d966cfd95f1238565afc399d56225c (diff) |
rcu: Break call_rcu() deadlock involving scheduler and perf
Dave Jones got the following lockdep splat:
> ======================================================
> [ INFO: possible circular locking dependency detected ]
> 3.12.0-rc3+ #92 Not tainted
> -------------------------------------------------------
> trinity-child2/15191 is trying to acquire lock:
> (&rdp->nocb_wq){......}, at: [<ffffffff8108ff43>] __wake_up+0x23/0x50
>
> but task is already holding lock:
> (&ctx->lock){-.-...}, at: [<ffffffff81154c19>] perf_event_exit_task+0x109/0x230
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #3 (&ctx->lock){-.-...}:
> [<ffffffff810cc243>] lock_acquire+0x93/0x200
> [<ffffffff81733f90>] _raw_spin_lock+0x40/0x80
> [<ffffffff811500ff>] __perf_event_task_sched_out+0x2df/0x5e0
> [<ffffffff81091b83>] perf_event_task_sched_out+0x93/0xa0
> [<ffffffff81732052>] __schedule+0x1d2/0xa20
> [<ffffffff81732f30>] preempt_schedule_irq+0x50/0xb0
> [<ffffffff817352b6>] retint_kernel+0x26/0x30
> [<ffffffff813eed04>] tty_flip_buffer_push+0x34/0x50
> [<ffffffff813f0504>] pty_write+0x54/0x60
> [<ffffffff813e900d>] n_tty_write+0x32d/0x4e0
> [<ffffffff813e5838>] tty_write+0x158/0x2d0
> [<ffffffff811c4850>] vfs_write+0xc0/0x1f0
> [<ffffffff811c52cc>] SyS_write+0x4c/0xa0
> [<ffffffff8173d4e4>] tracesys+0xdd/0xe2
>
> -> #2 (&rq->lock){-.-.-.}:
> [<ffffffff810cc243>] lock_acquire+0x93/0x200
> [<ffffffff81733f90>] _raw_spin_lock+0x40/0x80
> [<ffffffff810980b2>] wake_up_new_task+0xc2/0x2e0
> [<ffffffff81054336>] do_fork+0x126/0x460
> [<ffffffff81054696>] kernel_thread+0x26/0x30
> [<ffffffff8171ff93>] rest_init+0x23/0x140
> [<ffffffff81ee1e4b>] start_kernel+0x3f6/0x403
> [<ffffffff81ee1571>] x86_64_start_reservations+0x2a/0x2c
> [<ffffffff81ee1664>] x86_64_start_kernel+0xf1/0xf4
>
> -> #1 (&p->pi_lock){-.-.-.}:
> [<ffffffff810cc243>] lock_acquire+0x93/0x200
> [<ffffffff8173419b>] _raw_spin_lock_irqsave+0x4b/0x90
> [<ffffffff810979d1>] try_to_wake_up+0x31/0x350
> [<ffffffff81097d62>] default_wake_function+0x12/0x20
> [<ffffffff81084af8>] autoremove_wake_function+0x18/0x40
> [<ffffffff8108ea38>] __wake_up_common+0x58/0x90
> [<ffffffff8108ff59>] __wake_up+0x39/0x50
> [<ffffffff8110d4f8>] __call_rcu_nocb_enqueue+0xa8/0xc0
> [<ffffffff81111450>] __call_rcu+0x140/0x820
> [<ffffffff81111b8d>] call_rcu+0x1d/0x20
> [<ffffffff81093697>] cpu_attach_domain+0x287/0x360
> [<ffffffff81099d7e>] build_sched_domains+0xe5e/0x10a0
> [<ffffffff81efa7fc>] sched_init_smp+0x3b7/0x47a
> [<ffffffff81ee1f4e>] kernel_init_freeable+0xf6/0x202
> [<ffffffff817200be>] kernel_init+0xe/0x190
> [<ffffffff8173d22c>] ret_from_fork+0x7c/0xb0
>
> -> #0 (&rdp->nocb_wq){......}:
> [<ffffffff810cb7ca>] __lock_acquire+0x191a/0x1be0
> [<ffffffff810cc243>] lock_acquire+0x93/0x200
> [<ffffffff8173419b>] _raw_spin_lock_irqsave+0x4b/0x90
> [<ffffffff8108ff43>] __wake_up+0x23/0x50
> [<ffffffff8110d4f8>] __call_rcu_nocb_enqueue+0xa8/0xc0
> [<ffffffff81111450>] __call_rcu+0x140/0x820
> [<ffffffff81111bb0>] kfree_call_rcu+0x20/0x30
> [<ffffffff81149abf>] put_ctx+0x4f/0x70
> [<ffffffff81154c3e>] perf_event_exit_task+0x12e/0x230
> [<ffffffff81056b8d>] do_exit+0x30d/0xcc0
> [<ffffffff8105893c>] do_group_exit+0x4c/0xc0
> [<ffffffff810589c4>] SyS_exit_group+0x14/0x20
> [<ffffffff8173d4e4>] tracesys+0xdd/0xe2
>
> other info that might help us debug this:
>
> Chain exists of:
> &rdp->nocb_wq --> &rq->lock --> &ctx->lock
>
> Possible unsafe locking scenario:
>
> CPU0 CPU1
> ---- ----
> lock(&ctx->lock);
> lock(&rq->lock);
> lock(&ctx->lock);
> lock(&rdp->nocb_wq);
>
> *** DEADLOCK ***
>
> 1 lock held by trinity-child2/15191:
> #0: (&ctx->lock){-.-...}, at: [<ffffffff81154c19>] perf_event_exit_task+0x109/0x230
>
> stack backtrace:
> CPU: 2 PID: 15191 Comm: trinity-child2 Not tainted 3.12.0-rc3+ #92
> ffffffff82565b70 ffff880070c2dbf8 ffffffff8172a363 ffffffff824edf40
> ffff880070c2dc38 ffffffff81726741 ffff880070c2dc90 ffff88022383b1c0
> ffff88022383aac0 0000000000000000 ffff88022383b188 ffff88022383b1c0
> Call Trace:
> [<ffffffff8172a363>] dump_stack+0x4e/0x82
> [<ffffffff81726741>] print_circular_bug+0x200/0x20f
> [<ffffffff810cb7ca>] __lock_acquire+0x191a/0x1be0
> [<ffffffff810c6439>] ? get_lock_stats+0x19/0x60
> [<ffffffff8100b2f4>] ? native_sched_clock+0x24/0x80
> [<ffffffff810cc243>] lock_acquire+0x93/0x200
> [<ffffffff8108ff43>] ? __wake_up+0x23/0x50
> [<ffffffff8173419b>] _raw_spin_lock_irqsave+0x4b/0x90
> [<ffffffff8108ff43>] ? __wake_up+0x23/0x50
> [<ffffffff8108ff43>] __wake_up+0x23/0x50
> [<ffffffff8110d4f8>] __call_rcu_nocb_enqueue+0xa8/0xc0
> [<ffffffff81111450>] __call_rcu+0x140/0x820
> [<ffffffff8109bc8f>] ? local_clock+0x3f/0x50
> [<ffffffff81111bb0>] kfree_call_rcu+0x20/0x30
> [<ffffffff81149abf>] put_ctx+0x4f/0x70
> [<ffffffff81154c3e>] perf_event_exit_task+0x12e/0x230
> [<ffffffff81056b8d>] do_exit+0x30d/0xcc0
> [<ffffffff810c9af5>] ? trace_hardirqs_on_caller+0x115/0x1e0
> [<ffffffff810c9bcd>] ? trace_hardirqs_on+0xd/0x10
> [<ffffffff8105893c>] do_group_exit+0x4c/0xc0
> [<ffffffff810589c4>] SyS_exit_group+0x14/0x20
> [<ffffffff8173d4e4>] tracesys+0xdd/0xe2
The underlying problem is that perf is invoking call_rcu() with the
scheduler locks held, but in NOCB mode, call_rcu() will with high
probability invoke the scheduler -- which just might want to use its
locks. The reason that call_rcu() needs to invoke the scheduler is
to wake up the corresponding rcuo callback-offload kthread, which
does the job of starting up a grace period and invoking the callbacks
afterwards.
One solution (championed on a related problem by Lai Jiangshan) is to
simply defer the wakeup to some point where scheduler locks are no longer
held. Since we don't want to unnecessarily incur the cost of such
deferral, the task before us is threefold:
1. Determine when it is likely that a relevant scheduler lock is held.
2. Defer the wakeup in such cases.
3. Ensure that all deferred wakeups eventually happen, preferably
sooner rather than later.
We use irqs_disabled_flags() as a proxy for relevant scheduler locks
being held. This works because the relevant locks are always acquired
with interrupts disabled. We may defer more often than needed, but that
is at least safe.
The wakeup deferral is tracked via a new field in the per-CPU and
per-RCU-flavor rcu_data structure, namely ->nocb_defer_wakeup.
This flag is checked by the RCU core processing. The __rcu_pending()
function now checks this flag, which causes rcu_check_callbacks()
to initiate RCU core processing at each scheduling-clock interrupt
where this flag is set. Of course this is not sufficient because
scheduling-clock interrupts are often turned off (the things we used to
be able to count on!). So the flags are also checked on entry to any
state that RCU considers to be idle, which includes both NO_HZ_IDLE idle
state and NO_HZ_FULL user-mode-execution state.
This approach should allow call_rcu() to be invoked regardless of what
locks you might be holding, the key word being "should".
Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
-rw-r--r-- | Documentation/RCU/trace.txt | 20 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 24 | ||||
-rw-r--r-- | kernel/rcu/tree.h | 9 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 55 | ||||
-rw-r--r-- | kernel/rcu/tree_trace.c | 3 |
5 files changed, 86 insertions, 25 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index f3778f8952da..b8c3c813ea57 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -396,14 +396,14 @@ o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node | |||
396 | 396 | ||
397 | The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: | 397 | The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: |
398 | 398 | ||
399 | 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 | 399 | 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0 |
400 | 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 | 400 | 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0 |
401 | 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 | 401 | 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0 |
402 | 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 | 402 | 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0 |
403 | 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 | 403 | 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0 |
404 | 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 | 404 | 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0 |
405 | 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 | 405 | 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0 |
406 | 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 | 406 | 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0 |
407 | 407 | ||
408 | The fields are as follows: | 408 | The fields are as follows: |
409 | 409 | ||
@@ -432,6 +432,10 @@ o "gpc" is the number of times that an old grace period had | |||
432 | o "gps" is the number of times that a new grace period had started, | 432 | o "gps" is the number of times that a new grace period had started, |
433 | but this CPU was not yet aware of it. | 433 | but this CPU was not yet aware of it. |
434 | 434 | ||
435 | o "ndw" is the number of times that a wakeup of an rcuo | ||
436 | callback-offload kthread had to be deferred in order to avoid | ||
437 | deadlock. | ||
438 | |||
435 | o "nn" is the number of times that this CPU needed nothing. | 439 | o "nn" is the number of times that this CPU needed nothing. |
436 | 440 | ||
437 | 441 | ||
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index abef9c358d47..264f0284c0bd 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp) | |||
369 | static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, | 369 | static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, |
370 | bool user) | 370 | bool user) |
371 | { | 371 | { |
372 | struct rcu_state *rsp; | ||
373 | struct rcu_data *rdp; | ||
374 | |||
372 | trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); | 375 | trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); |
373 | if (!user && !is_idle_task(current)) { | 376 | if (!user && !is_idle_task(current)) { |
374 | struct task_struct *idle __maybe_unused = | 377 | struct task_struct *idle __maybe_unused = |
@@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, | |||
380 | current->pid, current->comm, | 383 | current->pid, current->comm, |
381 | idle->pid, idle->comm); /* must be idle task! */ | 384 | idle->pid, idle->comm); /* must be idle task! */ |
382 | } | 385 | } |
386 | for_each_rcu_flavor(rsp) { | ||
387 | rdp = this_cpu_ptr(rsp->rda); | ||
388 | do_nocb_deferred_wakeup(rdp); | ||
389 | } | ||
383 | rcu_prepare_for_idle(smp_processor_id()); | 390 | rcu_prepare_for_idle(smp_processor_id()); |
384 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ | 391 | /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ |
385 | smp_mb__before_atomic_inc(); /* See above. */ | 392 | smp_mb__before_atomic_inc(); /* See above. */ |
@@ -1928,13 +1935,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, | |||
1928 | * Adopt the RCU callbacks from the specified rcu_state structure's | 1935 | * Adopt the RCU callbacks from the specified rcu_state structure's |
1929 | * orphanage. The caller must hold the ->orphan_lock. | 1936 | * orphanage. The caller must hold the ->orphan_lock. |
1930 | */ | 1937 | */ |
1931 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | 1938 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) |
1932 | { | 1939 | { |
1933 | int i; | 1940 | int i; |
1934 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); | 1941 | struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); |
1935 | 1942 | ||
1936 | /* No-CBs CPUs are handled specially. */ | 1943 | /* No-CBs CPUs are handled specially. */ |
1937 | if (rcu_nocb_adopt_orphan_cbs(rsp, rdp)) | 1944 | if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) |
1938 | return; | 1945 | return; |
1939 | 1946 | ||
1940 | /* Do the accounting first. */ | 1947 | /* Do the accounting first. */ |
@@ -2013,7 +2020,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) | |||
2013 | 2020 | ||
2014 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ | 2021 | /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ |
2015 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); | 2022 | rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); |
2016 | rcu_adopt_orphan_cbs(rsp); | 2023 | rcu_adopt_orphan_cbs(rsp, flags); |
2017 | 2024 | ||
2018 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ | 2025 | /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ |
2019 | mask = rdp->grpmask; /* rnp->grplo is constant. */ | 2026 | mask = rdp->grpmask; /* rnp->grplo is constant. */ |
@@ -2330,6 +2337,9 @@ __rcu_process_callbacks(struct rcu_state *rsp) | |||
2330 | /* If there are callbacks ready, invoke them. */ | 2337 | /* If there are callbacks ready, invoke them. */ |
2331 | if (cpu_has_callbacks_ready_to_invoke(rdp)) | 2338 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
2332 | invoke_rcu_callbacks(rsp, rdp); | 2339 | invoke_rcu_callbacks(rsp, rdp); |
2340 | |||
2341 | /* Do any needed deferred wakeups of rcuo kthreads. */ | ||
2342 | do_nocb_deferred_wakeup(rdp); | ||
2333 | } | 2343 | } |
2334 | 2344 | ||
2335 | /* | 2345 | /* |
@@ -2464,7 +2474,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
2464 | 2474 | ||
2465 | if (cpu != -1) | 2475 | if (cpu != -1) |
2466 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2476 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2467 | offline = !__call_rcu_nocb(rdp, head, lazy); | 2477 | offline = !__call_rcu_nocb(rdp, head, lazy, flags); |
2468 | WARN_ON_ONCE(offline); | 2478 | WARN_ON_ONCE(offline); |
2469 | /* _call_rcu() is illegal on offline CPU; leak the callback. */ | 2479 | /* _call_rcu() is illegal on offline CPU; leak the callback. */ |
2470 | local_irq_restore(flags); | 2480 | local_irq_restore(flags); |
@@ -2817,6 +2827,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
2817 | return 1; | 2827 | return 1; |
2818 | } | 2828 | } |
2819 | 2829 | ||
2830 | /* Does this CPU need a deferred NOCB wakeup? */ | ||
2831 | if (rcu_nocb_need_deferred_wakeup(rdp)) { | ||
2832 | rdp->n_rp_nocb_defer_wakeup++; | ||
2833 | return 1; | ||
2834 | } | ||
2835 | |||
2820 | /* nothing to do */ | 2836 | /* nothing to do */ |
2821 | rdp->n_rp_need_nothing++; | 2837 | rdp->n_rp_need_nothing++; |
2822 | return 0; | 2838 | return 0; |
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 8e34d8674a4e..a87adfc2916b 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h | |||
@@ -317,6 +317,7 @@ struct rcu_data { | |||
317 | unsigned long n_rp_cpu_needs_gp; | 317 | unsigned long n_rp_cpu_needs_gp; |
318 | unsigned long n_rp_gp_completed; | 318 | unsigned long n_rp_gp_completed; |
319 | unsigned long n_rp_gp_started; | 319 | unsigned long n_rp_gp_started; |
320 | unsigned long n_rp_nocb_defer_wakeup; | ||
320 | unsigned long n_rp_need_nothing; | 321 | unsigned long n_rp_need_nothing; |
321 | 322 | ||
322 | /* 6) _rcu_barrier() and OOM callbacks. */ | 323 | /* 6) _rcu_barrier() and OOM callbacks. */ |
@@ -335,6 +336,7 @@ struct rcu_data { | |||
335 | int nocb_p_count_lazy; /* (approximate). */ | 336 | int nocb_p_count_lazy; /* (approximate). */ |
336 | wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ | 337 | wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ |
337 | struct task_struct *nocb_kthread; | 338 | struct task_struct *nocb_kthread; |
339 | bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ | ||
338 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ | 340 | #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ |
339 | 341 | ||
340 | /* 8) RCU CPU stall data. */ | 342 | /* 8) RCU CPU stall data. */ |
@@ -550,9 +552,12 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq); | |||
550 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); | 552 | static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); |
551 | static void rcu_init_one_nocb(struct rcu_node *rnp); | 553 | static void rcu_init_one_nocb(struct rcu_node *rnp); |
552 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | 554 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, |
553 | bool lazy); | 555 | bool lazy, unsigned long flags); |
554 | static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | 556 | static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, |
555 | struct rcu_data *rdp); | 557 | struct rcu_data *rdp, |
558 | unsigned long flags); | ||
559 | static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); | ||
560 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp); | ||
556 | static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); | 561 | static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); |
557 | static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); | 562 | static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); |
558 | static void rcu_kick_nohz_cpu(int cpu); | 563 | static void rcu_kick_nohz_cpu(int cpu); |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b023e5407111..752ffaa0d681 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -2104,7 +2104,8 @@ bool rcu_is_nocb_cpu(int cpu) | |||
2104 | static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | 2104 | static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, |
2105 | struct rcu_head *rhp, | 2105 | struct rcu_head *rhp, |
2106 | struct rcu_head **rhtp, | 2106 | struct rcu_head **rhtp, |
2107 | int rhcount, int rhcount_lazy) | 2107 | int rhcount, int rhcount_lazy, |
2108 | unsigned long flags) | ||
2108 | { | 2109 | { |
2109 | int len; | 2110 | int len; |
2110 | struct rcu_head **old_rhpp; | 2111 | struct rcu_head **old_rhpp; |
@@ -2125,9 +2126,16 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
2125 | } | 2126 | } |
2126 | len = atomic_long_read(&rdp->nocb_q_count); | 2127 | len = atomic_long_read(&rdp->nocb_q_count); |
2127 | if (old_rhpp == &rdp->nocb_head) { | 2128 | if (old_rhpp == &rdp->nocb_head) { |
2128 | wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ | 2129 | if (!irqs_disabled_flags(flags)) { |
2130 | wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */ | ||
2131 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||
2132 | TPS("WakeEmpty")); | ||
2133 | } else { | ||
2134 | rdp->nocb_defer_wakeup = true; | ||
2135 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||
2136 | TPS("WakeEmptyIsDeferred")); | ||
2137 | } | ||
2129 | rdp->qlen_last_fqs_check = 0; | 2138 | rdp->qlen_last_fqs_check = 0; |
2130 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty")); | ||
2131 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { | 2139 | } else if (len > rdp->qlen_last_fqs_check + qhimark) { |
2132 | wake_up_process(t); /* ... or if many callbacks queued. */ | 2140 | wake_up_process(t); /* ... or if many callbacks queued. */ |
2133 | rdp->qlen_last_fqs_check = LONG_MAX / 2; | 2141 | rdp->qlen_last_fqs_check = LONG_MAX / 2; |
@@ -2148,12 +2156,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
2148 | * "rcuo" kthread can find it. | 2156 | * "rcuo" kthread can find it. |
2149 | */ | 2157 | */ |
2150 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | 2158 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, |
2151 | bool lazy) | 2159 | bool lazy, unsigned long flags) |
2152 | { | 2160 | { |
2153 | 2161 | ||
2154 | if (!rcu_is_nocb_cpu(rdp->cpu)) | 2162 | if (!rcu_is_nocb_cpu(rdp->cpu)) |
2155 | return 0; | 2163 | return 0; |
2156 | __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); | 2164 | __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); |
2157 | if (__is_kfree_rcu_offset((unsigned long)rhp->func)) | 2165 | if (__is_kfree_rcu_offset((unsigned long)rhp->func)) |
2158 | trace_rcu_kfree_callback(rdp->rsp->name, rhp, | 2166 | trace_rcu_kfree_callback(rdp->rsp->name, rhp, |
2159 | (unsigned long)rhp->func, | 2167 | (unsigned long)rhp->func, |
@@ -2171,7 +2179,8 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | |||
2171 | * not a no-CBs CPU. | 2179 | * not a no-CBs CPU. |
2172 | */ | 2180 | */ |
2173 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | 2181 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, |
2174 | struct rcu_data *rdp) | 2182 | struct rcu_data *rdp, |
2183 | unsigned long flags) | ||
2175 | { | 2184 | { |
2176 | long ql = rsp->qlen; | 2185 | long ql = rsp->qlen; |
2177 | long qll = rsp->qlen_lazy; | 2186 | long qll = rsp->qlen_lazy; |
@@ -2185,14 +2194,14 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | |||
2185 | /* First, enqueue the donelist, if any. This preserves CB ordering. */ | 2194 | /* First, enqueue the donelist, if any. This preserves CB ordering. */ |
2186 | if (rsp->orphan_donelist != NULL) { | 2195 | if (rsp->orphan_donelist != NULL) { |
2187 | __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, | 2196 | __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, |
2188 | rsp->orphan_donetail, ql, qll); | 2197 | rsp->orphan_donetail, ql, qll, flags); |
2189 | ql = qll = 0; | 2198 | ql = qll = 0; |
2190 | rsp->orphan_donelist = NULL; | 2199 | rsp->orphan_donelist = NULL; |
2191 | rsp->orphan_donetail = &rsp->orphan_donelist; | 2200 | rsp->orphan_donetail = &rsp->orphan_donelist; |
2192 | } | 2201 | } |
2193 | if (rsp->orphan_nxtlist != NULL) { | 2202 | if (rsp->orphan_nxtlist != NULL) { |
2194 | __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, | 2203 | __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, |
2195 | rsp->orphan_nxttail, ql, qll); | 2204 | rsp->orphan_nxttail, ql, qll, flags); |
2196 | ql = qll = 0; | 2205 | ql = qll = 0; |
2197 | rsp->orphan_nxtlist = NULL; | 2206 | rsp->orphan_nxtlist = NULL; |
2198 | rsp->orphan_nxttail = &rsp->orphan_nxtlist; | 2207 | rsp->orphan_nxttail = &rsp->orphan_nxtlist; |
@@ -2314,6 +2323,22 @@ static int rcu_nocb_kthread(void *arg) | |||
2314 | return 0; | 2323 | return 0; |
2315 | } | 2324 | } |
2316 | 2325 | ||
2326 | /* Is a deferred wakeup of rcu_nocb_kthread() required? */ | ||
2327 | static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) | ||
2328 | { | ||
2329 | return ACCESS_ONCE(rdp->nocb_defer_wakeup); | ||
2330 | } | ||
2331 | |||
2332 | /* Do a deferred wakeup of rcu_nocb_kthread(). */ | ||
2333 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp) | ||
2334 | { | ||
2335 | if (!rcu_nocb_need_deferred_wakeup(rdp)) | ||
2336 | return; | ||
2337 | ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; | ||
2338 | wake_up(&rdp->nocb_wq); | ||
2339 | trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); | ||
2340 | } | ||
2341 | |||
2317 | /* Initialize per-rcu_data variables for no-CBs CPUs. */ | 2342 | /* Initialize per-rcu_data variables for no-CBs CPUs. */ |
2318 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | 2343 | static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) |
2319 | { | 2344 | { |
@@ -2369,13 +2394,14 @@ static void rcu_init_one_nocb(struct rcu_node *rnp) | |||
2369 | } | 2394 | } |
2370 | 2395 | ||
2371 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, | 2396 | static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, |
2372 | bool lazy) | 2397 | bool lazy, unsigned long flags) |
2373 | { | 2398 | { |
2374 | return 0; | 2399 | return 0; |
2375 | } | 2400 | } |
2376 | 2401 | ||
2377 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | 2402 | static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, |
2378 | struct rcu_data *rdp) | 2403 | struct rcu_data *rdp, |
2404 | unsigned long flags) | ||
2379 | { | 2405 | { |
2380 | return 0; | 2406 | return 0; |
2381 | } | 2407 | } |
@@ -2384,6 +2410,15 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) | |||
2384 | { | 2410 | { |
2385 | } | 2411 | } |
2386 | 2412 | ||
2413 | static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) | ||
2414 | { | ||
2415 | return false; | ||
2416 | } | ||
2417 | |||
2418 | static void do_nocb_deferred_wakeup(struct rcu_data *rdp) | ||
2419 | { | ||
2420 | } | ||
2421 | |||
2387 | static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) | 2422 | static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) |
2388 | { | 2423 | { |
2389 | } | 2424 | } |
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c index 3596797b7e46..4def475336d4 100644 --- a/kernel/rcu/tree_trace.c +++ b/kernel/rcu/tree_trace.c | |||
@@ -364,9 +364,10 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) | |||
364 | rdp->n_rp_report_qs, | 364 | rdp->n_rp_report_qs, |
365 | rdp->n_rp_cb_ready, | 365 | rdp->n_rp_cb_ready, |
366 | rdp->n_rp_cpu_needs_gp); | 366 | rdp->n_rp_cpu_needs_gp); |
367 | seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n", | 367 | seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n", |
368 | rdp->n_rp_gp_completed, | 368 | rdp->n_rp_gp_completed, |
369 | rdp->n_rp_gp_started, | 369 | rdp->n_rp_gp_started, |
370 | rdp->n_rp_nocb_defer_wakeup, | ||
370 | rdp->n_rp_need_nothing); | 371 | rdp->n_rp_need_nothing); |
371 | } | 372 | } |
372 | 373 | ||