aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-10-04 17:33:34 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-12-03 13:10:18 -0500
commit96d3fd0d315a949e30adc80f086031c5cdf070d1 (patch)
tree0fe7013d59b4d69a91bf031c0a53e8d279413e4a
parent78e4bc34e5d966cfd95f1238565afc399d56225c (diff)
rcu: Break call_rcu() deadlock involving scheduler and perf
Dave Jones got the following lockdep splat: > ====================================================== > [ INFO: possible circular locking dependency detected ] > 3.12.0-rc3+ #92 Not tainted > ------------------------------------------------------- > trinity-child2/15191 is trying to acquire lock: > (&rdp->nocb_wq){......}, at: [<ffffffff8108ff43>] __wake_up+0x23/0x50 > > but task is already holding lock: > (&ctx->lock){-.-...}, at: [<ffffffff81154c19>] perf_event_exit_task+0x109/0x230 > > which lock already depends on the new lock. > > > the existing dependency chain (in reverse order) is: > > -> #3 (&ctx->lock){-.-...}: > [<ffffffff810cc243>] lock_acquire+0x93/0x200 > [<ffffffff81733f90>] _raw_spin_lock+0x40/0x80 > [<ffffffff811500ff>] __perf_event_task_sched_out+0x2df/0x5e0 > [<ffffffff81091b83>] perf_event_task_sched_out+0x93/0xa0 > [<ffffffff81732052>] __schedule+0x1d2/0xa20 > [<ffffffff81732f30>] preempt_schedule_irq+0x50/0xb0 > [<ffffffff817352b6>] retint_kernel+0x26/0x30 > [<ffffffff813eed04>] tty_flip_buffer_push+0x34/0x50 > [<ffffffff813f0504>] pty_write+0x54/0x60 > [<ffffffff813e900d>] n_tty_write+0x32d/0x4e0 > [<ffffffff813e5838>] tty_write+0x158/0x2d0 > [<ffffffff811c4850>] vfs_write+0xc0/0x1f0 > [<ffffffff811c52cc>] SyS_write+0x4c/0xa0 > [<ffffffff8173d4e4>] tracesys+0xdd/0xe2 > > -> #2 (&rq->lock){-.-.-.}: > [<ffffffff810cc243>] lock_acquire+0x93/0x200 > [<ffffffff81733f90>] _raw_spin_lock+0x40/0x80 > [<ffffffff810980b2>] wake_up_new_task+0xc2/0x2e0 > [<ffffffff81054336>] do_fork+0x126/0x460 > [<ffffffff81054696>] kernel_thread+0x26/0x30 > [<ffffffff8171ff93>] rest_init+0x23/0x140 > [<ffffffff81ee1e4b>] start_kernel+0x3f6/0x403 > [<ffffffff81ee1571>] x86_64_start_reservations+0x2a/0x2c > [<ffffffff81ee1664>] x86_64_start_kernel+0xf1/0xf4 > > -> #1 (&p->pi_lock){-.-.-.}: > [<ffffffff810cc243>] lock_acquire+0x93/0x200 > [<ffffffff8173419b>] _raw_spin_lock_irqsave+0x4b/0x90 > [<ffffffff810979d1>] try_to_wake_up+0x31/0x350 > [<ffffffff81097d62>] default_wake_function+0x12/0x20 > [<ffffffff81084af8>] autoremove_wake_function+0x18/0x40 > [<ffffffff8108ea38>] __wake_up_common+0x58/0x90 > [<ffffffff8108ff59>] __wake_up+0x39/0x50 > [<ffffffff8110d4f8>] __call_rcu_nocb_enqueue+0xa8/0xc0 > [<ffffffff81111450>] __call_rcu+0x140/0x820 > [<ffffffff81111b8d>] call_rcu+0x1d/0x20 > [<ffffffff81093697>] cpu_attach_domain+0x287/0x360 > [<ffffffff81099d7e>] build_sched_domains+0xe5e/0x10a0 > [<ffffffff81efa7fc>] sched_init_smp+0x3b7/0x47a > [<ffffffff81ee1f4e>] kernel_init_freeable+0xf6/0x202 > [<ffffffff817200be>] kernel_init+0xe/0x190 > [<ffffffff8173d22c>] ret_from_fork+0x7c/0xb0 > > -> #0 (&rdp->nocb_wq){......}: > [<ffffffff810cb7ca>] __lock_acquire+0x191a/0x1be0 > [<ffffffff810cc243>] lock_acquire+0x93/0x200 > [<ffffffff8173419b>] _raw_spin_lock_irqsave+0x4b/0x90 > [<ffffffff8108ff43>] __wake_up+0x23/0x50 > [<ffffffff8110d4f8>] __call_rcu_nocb_enqueue+0xa8/0xc0 > [<ffffffff81111450>] __call_rcu+0x140/0x820 > [<ffffffff81111bb0>] kfree_call_rcu+0x20/0x30 > [<ffffffff81149abf>] put_ctx+0x4f/0x70 > [<ffffffff81154c3e>] perf_event_exit_task+0x12e/0x230 > [<ffffffff81056b8d>] do_exit+0x30d/0xcc0 > [<ffffffff8105893c>] do_group_exit+0x4c/0xc0 > [<ffffffff810589c4>] SyS_exit_group+0x14/0x20 > [<ffffffff8173d4e4>] tracesys+0xdd/0xe2 > > other info that might help us debug this: > > Chain exists of: > &rdp->nocb_wq --> &rq->lock --> &ctx->lock > > Possible unsafe locking scenario: > > CPU0 CPU1 > ---- ---- > lock(&ctx->lock); > lock(&rq->lock); > lock(&ctx->lock); > lock(&rdp->nocb_wq); > > *** DEADLOCK *** > > 1 lock held by trinity-child2/15191: > #0: (&ctx->lock){-.-...}, at: [<ffffffff81154c19>] perf_event_exit_task+0x109/0x230 > > stack backtrace: > CPU: 2 PID: 15191 Comm: trinity-child2 Not tainted 3.12.0-rc3+ #92 > ffffffff82565b70 ffff880070c2dbf8 ffffffff8172a363 ffffffff824edf40 > ffff880070c2dc38 ffffffff81726741 ffff880070c2dc90 ffff88022383b1c0 > ffff88022383aac0 0000000000000000 ffff88022383b188 ffff88022383b1c0 > Call Trace: > [<ffffffff8172a363>] dump_stack+0x4e/0x82 > [<ffffffff81726741>] print_circular_bug+0x200/0x20f > [<ffffffff810cb7ca>] __lock_acquire+0x191a/0x1be0 > [<ffffffff810c6439>] ? get_lock_stats+0x19/0x60 > [<ffffffff8100b2f4>] ? native_sched_clock+0x24/0x80 > [<ffffffff810cc243>] lock_acquire+0x93/0x200 > [<ffffffff8108ff43>] ? __wake_up+0x23/0x50 > [<ffffffff8173419b>] _raw_spin_lock_irqsave+0x4b/0x90 > [<ffffffff8108ff43>] ? __wake_up+0x23/0x50 > [<ffffffff8108ff43>] __wake_up+0x23/0x50 > [<ffffffff8110d4f8>] __call_rcu_nocb_enqueue+0xa8/0xc0 > [<ffffffff81111450>] __call_rcu+0x140/0x820 > [<ffffffff8109bc8f>] ? local_clock+0x3f/0x50 > [<ffffffff81111bb0>] kfree_call_rcu+0x20/0x30 > [<ffffffff81149abf>] put_ctx+0x4f/0x70 > [<ffffffff81154c3e>] perf_event_exit_task+0x12e/0x230 > [<ffffffff81056b8d>] do_exit+0x30d/0xcc0 > [<ffffffff810c9af5>] ? trace_hardirqs_on_caller+0x115/0x1e0 > [<ffffffff810c9bcd>] ? trace_hardirqs_on+0xd/0x10 > [<ffffffff8105893c>] do_group_exit+0x4c/0xc0 > [<ffffffff810589c4>] SyS_exit_group+0x14/0x20 > [<ffffffff8173d4e4>] tracesys+0xdd/0xe2 The underlying problem is that perf is invoking call_rcu() with the scheduler locks held, but in NOCB mode, call_rcu() will with high probability invoke the scheduler -- which just might want to use its locks. The reason that call_rcu() needs to invoke the scheduler is to wake up the corresponding rcuo callback-offload kthread, which does the job of starting up a grace period and invoking the callbacks afterwards. One solution (championed on a related problem by Lai Jiangshan) is to simply defer the wakeup to some point where scheduler locks are no longer held. Since we don't want to unnecessarily incur the cost of such deferral, the task before us is threefold: 1. Determine when it is likely that a relevant scheduler lock is held. 2. Defer the wakeup in such cases. 3. Ensure that all deferred wakeups eventually happen, preferably sooner rather than later. We use irqs_disabled_flags() as a proxy for relevant scheduler locks being held. This works because the relevant locks are always acquired with interrupts disabled. We may defer more often than needed, but that is at least safe. The wakeup deferral is tracked via a new field in the per-CPU and per-RCU-flavor rcu_data structure, namely ->nocb_defer_wakeup. This flag is checked by the RCU core processing. The __rcu_pending() function now checks this flag, which causes rcu_check_callbacks() to initiate RCU core processing at each scheduling-clock interrupt where this flag is set. Of course this is not sufficient because scheduling-clock interrupts are often turned off (the things we used to be able to count on!). So the flags are also checked on entry to any state that RCU considers to be idle, which includes both NO_HZ_IDLE idle state and NO_HZ_FULL user-mode-execution state. This approach should allow call_rcu() to be invoked regardless of what locks you might be holding, the key word being "should". Reported-by: Dave Jones <davej@redhat.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Peter Zijlstra <peterz@infradead.org>
-rw-r--r--Documentation/RCU/trace.txt20
-rw-r--r--kernel/rcu/tree.c24
-rw-r--r--kernel/rcu/tree.h9
-rw-r--r--kernel/rcu/tree_plugin.h55
-rw-r--r--kernel/rcu/tree_trace.c3
5 files changed, 86 insertions, 25 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index f3778f8952da..b8c3c813ea57 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -396,14 +396,14 @@ o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
396 396
397The output of "cat rcu/rcu_sched/rcu_pending" looks as follows: 397The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
398 398
399 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 399 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0
400 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 400 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0
401 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 401 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0
402 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 402 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0
403 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 403 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0
404 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 404 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0
405 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 405 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0
406 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 406 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0
407 407
408The fields are as follows: 408The fields are as follows:
409 409
@@ -432,6 +432,10 @@ o "gpc" is the number of times that an old grace period had
432o "gps" is the number of times that a new grace period had started, 432o "gps" is the number of times that a new grace period had started,
433 but this CPU was not yet aware of it. 433 but this CPU was not yet aware of it.
434 434
435o "ndw" is the number of times that a wakeup of an rcuo
436 callback-offload kthread had to be deferred in order to avoid
437 deadlock.
438
435o "nn" is the number of times that this CPU needed nothing. 439o "nn" is the number of times that this CPU needed nothing.
436 440
437 441
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index abef9c358d47..264f0284c0bd 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -369,6 +369,9 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
369static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval, 369static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
370 bool user) 370 bool user)
371{ 371{
372 struct rcu_state *rsp;
373 struct rcu_data *rdp;
374
372 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting); 375 trace_rcu_dyntick(TPS("Start"), oldval, rdtp->dynticks_nesting);
373 if (!user && !is_idle_task(current)) { 376 if (!user && !is_idle_task(current)) {
374 struct task_struct *idle __maybe_unused = 377 struct task_struct *idle __maybe_unused =
@@ -380,6 +383,10 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
380 current->pid, current->comm, 383 current->pid, current->comm,
381 idle->pid, idle->comm); /* must be idle task! */ 384 idle->pid, idle->comm); /* must be idle task! */
382 } 385 }
386 for_each_rcu_flavor(rsp) {
387 rdp = this_cpu_ptr(rsp->rda);
388 do_nocb_deferred_wakeup(rdp);
389 }
383 rcu_prepare_for_idle(smp_processor_id()); 390 rcu_prepare_for_idle(smp_processor_id());
384 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 391 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
385 smp_mb__before_atomic_inc(); /* See above. */ 392 smp_mb__before_atomic_inc(); /* See above. */
@@ -1928,13 +1935,13 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1928 * Adopt the RCU callbacks from the specified rcu_state structure's 1935 * Adopt the RCU callbacks from the specified rcu_state structure's
1929 * orphanage. The caller must hold the ->orphan_lock. 1936 * orphanage. The caller must hold the ->orphan_lock.
1930 */ 1937 */
1931static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 1938static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
1932{ 1939{
1933 int i; 1940 int i;
1934 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1941 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1935 1942
1936 /* No-CBs CPUs are handled specially. */ 1943 /* No-CBs CPUs are handled specially. */
1937 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp)) 1944 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
1938 return; 1945 return;
1939 1946
1940 /* Do the accounting first. */ 1947 /* Do the accounting first. */
@@ -2013,7 +2020,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2013 2020
2014 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 2021 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
2015 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 2022 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
2016 rcu_adopt_orphan_cbs(rsp); 2023 rcu_adopt_orphan_cbs(rsp, flags);
2017 2024
2018 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 2025 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
2019 mask = rdp->grpmask; /* rnp->grplo is constant. */ 2026 mask = rdp->grpmask; /* rnp->grplo is constant. */
@@ -2330,6 +2337,9 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2330 /* If there are callbacks ready, invoke them. */ 2337 /* If there are callbacks ready, invoke them. */
2331 if (cpu_has_callbacks_ready_to_invoke(rdp)) 2338 if (cpu_has_callbacks_ready_to_invoke(rdp))
2332 invoke_rcu_callbacks(rsp, rdp); 2339 invoke_rcu_callbacks(rsp, rdp);
2340
2341 /* Do any needed deferred wakeups of rcuo kthreads. */
2342 do_nocb_deferred_wakeup(rdp);
2333} 2343}
2334 2344
2335/* 2345/*
@@ -2464,7 +2474,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2464 2474
2465 if (cpu != -1) 2475 if (cpu != -1)
2466 rdp = per_cpu_ptr(rsp->rda, cpu); 2476 rdp = per_cpu_ptr(rsp->rda, cpu);
2467 offline = !__call_rcu_nocb(rdp, head, lazy); 2477 offline = !__call_rcu_nocb(rdp, head, lazy, flags);
2468 WARN_ON_ONCE(offline); 2478 WARN_ON_ONCE(offline);
2469 /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2479 /* _call_rcu() is illegal on offline CPU; leak the callback. */
2470 local_irq_restore(flags); 2480 local_irq_restore(flags);
@@ -2817,6 +2827,12 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
2817 return 1; 2827 return 1;
2818 } 2828 }
2819 2829
2830 /* Does this CPU need a deferred NOCB wakeup? */
2831 if (rcu_nocb_need_deferred_wakeup(rdp)) {
2832 rdp->n_rp_nocb_defer_wakeup++;
2833 return 1;
2834 }
2835
2820 /* nothing to do */ 2836 /* nothing to do */
2821 rdp->n_rp_need_nothing++; 2837 rdp->n_rp_need_nothing++;
2822 return 0; 2838 return 0;
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 8e34d8674a4e..a87adfc2916b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -317,6 +317,7 @@ struct rcu_data {
317 unsigned long n_rp_cpu_needs_gp; 317 unsigned long n_rp_cpu_needs_gp;
318 unsigned long n_rp_gp_completed; 318 unsigned long n_rp_gp_completed;
319 unsigned long n_rp_gp_started; 319 unsigned long n_rp_gp_started;
320 unsigned long n_rp_nocb_defer_wakeup;
320 unsigned long n_rp_need_nothing; 321 unsigned long n_rp_need_nothing;
321 322
322 /* 6) _rcu_barrier() and OOM callbacks. */ 323 /* 6) _rcu_barrier() and OOM callbacks. */
@@ -335,6 +336,7 @@ struct rcu_data {
335 int nocb_p_count_lazy; /* (approximate). */ 336 int nocb_p_count_lazy; /* (approximate). */
336 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 337 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
337 struct task_struct *nocb_kthread; 338 struct task_struct *nocb_kthread;
339 bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
338#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 340#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
339 341
340 /* 8) RCU CPU stall data. */ 342 /* 8) RCU CPU stall data. */
@@ -550,9 +552,12 @@ static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
550static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp); 552static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp);
551static void rcu_init_one_nocb(struct rcu_node *rnp); 553static void rcu_init_one_nocb(struct rcu_node *rnp);
552static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 554static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
553 bool lazy); 555 bool lazy, unsigned long flags);
554static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 556static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
555 struct rcu_data *rdp); 557 struct rcu_data *rdp,
558 unsigned long flags);
559static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
560static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
556static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 561static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
557static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 562static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
558static void rcu_kick_nohz_cpu(int cpu); 563static void rcu_kick_nohz_cpu(int cpu);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index b023e5407111..752ffaa0d681 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -2104,7 +2104,8 @@ bool rcu_is_nocb_cpu(int cpu)
2104static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, 2104static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2105 struct rcu_head *rhp, 2105 struct rcu_head *rhp,
2106 struct rcu_head **rhtp, 2106 struct rcu_head **rhtp,
2107 int rhcount, int rhcount_lazy) 2107 int rhcount, int rhcount_lazy,
2108 unsigned long flags)
2108{ 2109{
2109 int len; 2110 int len;
2110 struct rcu_head **old_rhpp; 2111 struct rcu_head **old_rhpp;
@@ -2125,9 +2126,16 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2125 } 2126 }
2126 len = atomic_long_read(&rdp->nocb_q_count); 2127 len = atomic_long_read(&rdp->nocb_q_count);
2127 if (old_rhpp == &rdp->nocb_head) { 2128 if (old_rhpp == &rdp->nocb_head) {
2128 wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */ 2129 if (!irqs_disabled_flags(flags)) {
2130 wake_up(&rdp->nocb_wq); /* ... if queue was empty ... */
2131 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2132 TPS("WakeEmpty"));
2133 } else {
2134 rdp->nocb_defer_wakeup = true;
2135 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2136 TPS("WakeEmptyIsDeferred"));
2137 }
2129 rdp->qlen_last_fqs_check = 0; 2138 rdp->qlen_last_fqs_check = 0;
2130 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeEmpty"));
2131 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 2139 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
2132 wake_up_process(t); /* ... or if many callbacks queued. */ 2140 wake_up_process(t); /* ... or if many callbacks queued. */
2133 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2141 rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2148,12 +2156,12 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2148 * "rcuo" kthread can find it. 2156 * "rcuo" kthread can find it.
2149 */ 2157 */
2150static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2158static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2151 bool lazy) 2159 bool lazy, unsigned long flags)
2152{ 2160{
2153 2161
2154 if (!rcu_is_nocb_cpu(rdp->cpu)) 2162 if (!rcu_is_nocb_cpu(rdp->cpu))
2155 return 0; 2163 return 0;
2156 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); 2164 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
2157 if (__is_kfree_rcu_offset((unsigned long)rhp->func)) 2165 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2158 trace_rcu_kfree_callback(rdp->rsp->name, rhp, 2166 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
2159 (unsigned long)rhp->func, 2167 (unsigned long)rhp->func,
@@ -2171,7 +2179,8 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2171 * not a no-CBs CPU. 2179 * not a no-CBs CPU.
2172 */ 2180 */
2173static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2181static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2174 struct rcu_data *rdp) 2182 struct rcu_data *rdp,
2183 unsigned long flags)
2175{ 2184{
2176 long ql = rsp->qlen; 2185 long ql = rsp->qlen;
2177 long qll = rsp->qlen_lazy; 2186 long qll = rsp->qlen_lazy;
@@ -2185,14 +2194,14 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2185 /* First, enqueue the donelist, if any. This preserves CB ordering. */ 2194 /* First, enqueue the donelist, if any. This preserves CB ordering. */
2186 if (rsp->orphan_donelist != NULL) { 2195 if (rsp->orphan_donelist != NULL) {
2187 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, 2196 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
2188 rsp->orphan_donetail, ql, qll); 2197 rsp->orphan_donetail, ql, qll, flags);
2189 ql = qll = 0; 2198 ql = qll = 0;
2190 rsp->orphan_donelist = NULL; 2199 rsp->orphan_donelist = NULL;
2191 rsp->orphan_donetail = &rsp->orphan_donelist; 2200 rsp->orphan_donetail = &rsp->orphan_donelist;
2192 } 2201 }
2193 if (rsp->orphan_nxtlist != NULL) { 2202 if (rsp->orphan_nxtlist != NULL) {
2194 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, 2203 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
2195 rsp->orphan_nxttail, ql, qll); 2204 rsp->orphan_nxttail, ql, qll, flags);
2196 ql = qll = 0; 2205 ql = qll = 0;
2197 rsp->orphan_nxtlist = NULL; 2206 rsp->orphan_nxtlist = NULL;
2198 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 2207 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
@@ -2314,6 +2323,22 @@ static int rcu_nocb_kthread(void *arg)
2314 return 0; 2323 return 0;
2315} 2324}
2316 2325
2326/* Is a deferred wakeup of rcu_nocb_kthread() required? */
2327static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2328{
2329 return ACCESS_ONCE(rdp->nocb_defer_wakeup);
2330}
2331
2332/* Do a deferred wakeup of rcu_nocb_kthread(). */
2333static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2334{
2335 if (!rcu_nocb_need_deferred_wakeup(rdp))
2336 return;
2337 ACCESS_ONCE(rdp->nocb_defer_wakeup) = false;
2338 wake_up(&rdp->nocb_wq);
2339 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
2340}
2341
2317/* Initialize per-rcu_data variables for no-CBs CPUs. */ 2342/* Initialize per-rcu_data variables for no-CBs CPUs. */
2318static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2343static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2319{ 2344{
@@ -2369,13 +2394,14 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
2369} 2394}
2370 2395
2371static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2396static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2372 bool lazy) 2397 bool lazy, unsigned long flags)
2373{ 2398{
2374 return 0; 2399 return 0;
2375} 2400}
2376 2401
2377static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2402static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2378 struct rcu_data *rdp) 2403 struct rcu_data *rdp,
2404 unsigned long flags)
2379{ 2405{
2380 return 0; 2406 return 0;
2381} 2407}
@@ -2384,6 +2410,15 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2384{ 2410{
2385} 2411}
2386 2412
2413static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2414{
2415 return false;
2416}
2417
2418static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2419{
2420}
2421
2387static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2422static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2388{ 2423{
2389} 2424}
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
index 3596797b7e46..4def475336d4 100644
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@@ -364,9 +364,10 @@ static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
364 rdp->n_rp_report_qs, 364 rdp->n_rp_report_qs,
365 rdp->n_rp_cb_ready, 365 rdp->n_rp_cb_ready,
366 rdp->n_rp_cpu_needs_gp); 366 rdp->n_rp_cpu_needs_gp);
367 seq_printf(m, "gpc=%ld gps=%ld nn=%ld\n", 367 seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
368 rdp->n_rp_gp_completed, 368 rdp->n_rp_gp_completed,
369 rdp->n_rp_gp_started, 369 rdp->n_rp_gp_started,
370 rdp->n_rp_nocb_defer_wakeup,
370 rdp->n_rp_need_nothing); 371 rdp->n_rp_need_nothing);
371} 372}
372 373